def compute(learning_rate, momentum, epochs=50): #loading with CIFAR Data. X_train, y_train, X_val, y_val, X_test, y_test =\ du.get_CIFAR10_data(num_training=10000, num_validation=10, num_test=1000) # Initializing NeuralNet Dimensions. N, D_in = X_train.shape H, D_out = 50, 10 dtype = torch.FloatTensor # Converting NumPy Array to torch tensor. X_train = torch.from_numpy(X_train).float() y_train = torch.LongTensor(y_train) #Wrapping with Variable. x = Variable(X_train) y = Variable(y_train, requires_grad=False) # Initializing TwoLayerNeuralNet. model = TwoLayerNeuralNet(D_in, H, D_out) # Loss Function and Optimizer. # critirion = torch.nn.MSELoss(size_average=False) critirion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) loss_dict = {} for i in range(epochs): y_pred = model(x) loss = critirion(y_pred, y) loss_dict[i] = loss.data[0] # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() # Converting Test NumPy Array to torch tensor. X_test = torch.from_numpy(X_test).float() y_test = torch.from_numpy(y_test).float() X_test = Variable(X_test) # Testing the accuracy.Max returns both values and indices. y_test_pred = model(X_test) _, y_test_pred = torch.max(y_test_pred.data, 1) # Calculating total and corrct. Conversion required since target y_test_pred is a long tensor. total_labels = y_test.size(0) correct = (y_test_pred.float() == y_test).sum() # Returning Accuracy + the Correct count of classes. accuracy = (100.0 * correct / total_labels, correct) return accuracy
def main(debug=True): X_tr, Y_tr, X_val, Y_val, X_te, Y_te = get_CIFAR10_data() tf.reset_default_graph() model = CIFAR_Model() with tf.Session() as sess: with tf.device("/cpu:0"): sess.run(tf.global_variables_initializer()) model.fit( sess, 5, 32, (X_tr, Y_tr), (X_val, Y_val), .6 ) print("")
def main(): args, remaining = parser.parse_known_args() lr = args.learning_rate#INITIAL_LEARNING_RATE reg_weight = args.regularization_weight kp = args.keep_prob max_steps = args.max_steps decay_rate = args.decay_rate lr_decay_time = args.lr_decay_time batch_size = args.batch_size optimizer, opt_string = get_optimizer(args, remaining) print(opt_string) #CURRENTLY NOT Used print("Arguments = ", args) print("Loading Data;") data = get_CIFAR10_data() train_size = len(data['y_train']) for k, v in data.items(): print('%s: '%(k), v.shape) #PLACEHOLDER VARIABLES keep_prob = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.placeholder(dtype=tf.float32, shape=()) regularizer_weight = tf.placeholder(dtype=tf.float32, shape=()) is_training = tf.placeholder(dtype=tf.bool, shape=()) X_image = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3]) y_label = tf.placeholder(dtype=tf.int64, shape=[None]) # test = tf.equal(True, is_training) #only do distortions on training data X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_flip_left_right(img), X_image), lambda: X_image) X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_flip_up_down(img), X_image), lambda: X_image) X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=60), X_image), lambda: X_image) X_image = tf.cond(is_training, lambda: tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.2, upper=1.8), X_image), lambda: X_image) # X_image = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), X_image) # X_image = tf.map_fn(lambda img: tf.image.random_flip_up_down(img), X_image) # X_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=60), X_image) # X_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.2, upper=1.8), X_image) # def image_distortions(image, distortions): # distort_left_right_random = distortions[0] # mirror = tf.less(tf.pack([1.0, distort_left_right_random, 1.0]), 0.5) # image = tf.reverse(image, mirror) # distort_up_down_random = distortions[1] # mirror = tf.less(tf.pack([distort_up_down_random, 1.0, 1.0]), 0.5) # image = tf.reverse(image, mirror) # return image # distortions = tf.random_uniform([2], 0, 1.0, dtype=tf.float32) # image = image_distortions(image, distortions) # tf.image.flip_up_down(image) # tf.image.flip_left_right(image) # tf.image.transpose_image(image) # tf.image.rot90(image, k=1, name=None) # tf.image.adjust_brightness # tf.image.adjust_contrast(images, contrast_factor) # tf.image.per_image_standardization(image) #MODEL related operations and values global_step = tf.Variable(0, trainable=False) use_batchnorm = args.batch_norm b_norm_images = tf.contrib.layers.batch_norm(inputs=X_image, center=True, scale=True, decay=0.99, data_format="NHWC", is_training=is_training, scope="input", updates_collections=None) images = tf.select( use_batchnorm is True, b_norm_images, X_image) #MODEL construction logits, grad_image, grad_image_placeholder, last_layer = inference(images, keep_prob=keep_prob, regularizer_weight=regularizer_weight, is_training=is_training) prediction = predict(logits) loss_op = loss(logits, y_label) reg_loss = tf.reduce_sum(tf.get_collection(LOSSES_COLLECTION)) total_loss = loss_op + reg_loss accuracy_op = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits,1), y_label), tf.float32)) #print('decay_steps = ', lr_decay_time * (train_size // batch_size + 1)) print("Number of batch steps till lr_decay = ", lr_decay_time * ((train_size // batch_size) + 1)) train_op = train(total_loss, global_step, learning_rate=lr, lr_rate_decay_factor=decay_rate, decay_steps=lr_decay_time * ((train_size // batch_size) + 1)) saver = tf.train.Saver(tf.global_variables()) #Summary operation tf.summary.image('images', X_image) summary_op = tf.summary.merge_all() acc_summary = tf.summary.scalar('Training_accuracy_batch', accuracy_op) validation_acc_summary = tf.summary.scalar('Validation_accuracy', accuracy_op) cross_entropy_loss = tf.summary.scalar('loss_raw', loss_op) reg_loss_summary = tf.summary.scalar('regularization_loss', reg_loss) total_loss_summary = tf.summary.scalar('total_loss', total_loss) accuracy_batch = tf.placeholder(shape=(None), dtype=tf.float32) overfit_estimate = tf.placeholder(shape=(None), dtype=tf.float32) accuracy_100 = tf.reduce_mean(accuracy_batch) mean_summary = tf.summary.scalar('Training_accuracy_mean', accuracy_100) validation_mean_summary = tf.summary.scalar('Validation_accuracy_mean', accuracy_100) acc_summary_histogram = tf.summary.histogram('Training_accuracy_histogram', accuracy_batch) overfit_summary = tf.summary.scalar('overfit_estimate', overfit_estimate) #SESSION Construction init = tf.global_variables_initializer() config = tf.ConfigProto() # config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.log_device_placement=False sess = tf.Session(config=config) sess.run(init) # input_grad_image = np.zeros((1,32,32,16), dtype=np.float) # input_grad_image[0,15,15,:] = 1000. # back_image = sess.run(grad_image[0], feed_dict={X_image : 128 * np.ones((1,32,32,3)), regularizer_weight : 0., keep_prob : 1.0, grad_image_placeholder : input_grad_image}) # print(back_image, np.max(back_image)) # plt.figure() # max_value = np.max(back_image) # min_value = np.min(back_image) # print(back_image.shape) # plt.imshow(back_image[:,:,0], cmap=plt.get_cmap("seismic"), vmin=-1, # vmax=1, interpolation="nearest") # plt.show() # sys.exit(0) #today = date.today() current_time = datetime.now() # LR_%f, INITIAL_LEARNING_RATE # REG_%f, DEFAULT_REG_WEIGHT # add details, relating per epoch results (and mean filtered loss etc.) train_dir = "cifar10_results/l1_layer/bn_" + str(int(use_batchnorm)) + "/" +"LR_" + str(lr) + "/" + "REG_" + str(reg_weight) + "/" + "KP_" + str(kp) + "/" + current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute) print("Writing summary data to : ", train_dir) #probably should write parameters used to train the model to this directory #also pickle the named tuple # with open('train_dir' + '/model_parameters.txt', 'w') as outfile: # # #should write the checkpoint files acc_list = [] valid_acc_list = [] cm_placeholder = tf.placeholder(shape=(1, None, None, 4), dtype=tf.uint8) confusion_summary = tf.summary.image('confusion_matrix', cm_placeholder) layer_output_placeholder = tf.placeholder(shape=(3,None,None,1), dtype=tf.uint8) layer_summary = tf.summary.image('layer_summary', layer_output_placeholder) print(last_layer.get_shape()) summary_writer = tf.summary.FileWriter(train_dir, sess.graph) print("Starting Training.") print("Training for %d batches (of size %d); initial learning rate %f" % (max_steps, batch_size, lr)) # tqdm_format_str = ('{0}: step {1:>5d}, loss = {2:2.3f}, accuracy = {3:>3.2f}, accuracy (val) = {4:>3.2f}') # current_step = 0 # tqdm_loss = np.inf # tqdm_acc = 0. # tqdm_val = 0. # t = tqdm(range(max_steps), desc="Epoch %d, step %d, loss %2.2f, acc %2.2f, acc (val) %2.2f"%(epoch, current_step, tqdm_loss, tqdm_acc, tqdm_val), leave=True) #t = trange(max_steps, desc="Epoch %d, step %d, loss %2.2f, acc %2.2f, acc (val) %2.2f"%(epoch, current_step, tqdm_loss, tqdm_acc, tqdm_val), leave=True) for step in range(max_steps): # current_step = step # t.set_description(desc="Epoch %d, step %d, loss %2.2f, acc %2.2f, acc (val) %2.2f"%(epoch, current_step, tqdm_loss, tqdm_acc, tqdm_val)) # t.refresh() num_train = data['X_train'].shape[0] if batch_size * (step - 1) // num_train < batch_size * (step) // num_train and step > 0: print("Completed Epoch: %d (step=%d, max_steps=%d, percentage complete= %f)" % ((batch_size * (step) // num_train ), step, max_steps, step/max_steps * 100)) epoch = (batch_size * (step) // num_train ) batch_mask = np.random.choice(num_train, batch_size) X_batch = data['X_train'][batch_mask] y_batch = data['y_train'][batch_mask] start_time = time.time() feed_dict = { X_image : X_batch, y_label : y_batch, keep_prob : kp, regularizer_weight : reg_weight, is_training : True } loss_value, accuracy, acc_str, xentropy_str, reg_loss_str, predicted_class = sess.run([total_loss, accuracy_op, acc_summary, cross_entropy_loss, reg_loss_summary, prediction], feed_dict=feed_dict) #print(sess.run(prediction, feed_dict=feed_dict)) # tqdm_loss = loss_value # tqdm_acc = accuracy sess.run(train_op, feed_dict=feed_dict) # if step > 0 and step % 50 == 0: # last_layer_out, logits_out = sess.run([last_layer, logits], feed_dict=feed_dict) # #print(logits[0]) # logits_out = np.exp(logits_out) / np.sum(np.exp(logits_out), axis=0) # #print(layer_out.shape, layer_out.mean()) # sliced_layer = last_layer_out[0:1,:,:,0:9] # sliced_layer = np.transpose(sliced_layer, (3,1,2,0)) # #print(sliced_layer.shape) # split_layer = np.vsplit(sliced_layer, sliced_layer.shape[0]) # squeezed_ = [np.squeeze(x, axis=(0,3)) for x in split_layer] # vstacked = np.vstack(squeezed_) # #print(vstacked.shape) # plt.figure() # plt.subplot(211) # plt.imshow(vstacked, vmin=0, vmax=np.max(vstacked), cmap=plt.cm.Blues) # plt.grid(b='off') # plt.subplot(212) # bar_width = 0.1 # print(logits_out.shape) # index = np.arange(len(logits_out[0])) # colors = ['blue' for x in logits_out[0]] # colors[np.argmax(logits_out[0])] = 'green' # sns.barplot(classes, logits_out[0], palette=colors) # plt.grid(b='off') # plt.show() acc_list.append(accuracy) acc_list = acc_list[-100:] accuracy_100_str = sess.run(mean_summary, feed_dict={accuracy_batch : np.array(acc_list)}) #print(sess.run([accuracy_100], feed_dict={accuracy_batch : np.array(acc_list[-100:])})) summary_writer.add_summary(acc_str, step) summary_writer.add_summary(xentropy_str, step) summary_writer.add_summary(reg_loss_str, step) summary_writer.add_summary(accuracy_100_str, step) #image = sess.run(grad_image, feed_dict=feed_dict) #summary_writer.add_summary('Training_accuracy (Mean)', np.mean(acc_list[-100:]), step) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # plt.figure() # plt.imshow(image[0]) # plt.grid(b=False) # plt.show() if step % 10 == 0: #print("max = %f; mean = %f" %(np.max(image), np.mean(image))) if step > 0: #print('creating image;') confusion_img = plot_confusion_matrix(confusion_matrix(y_batch, predicted_class), title='Confusion matrix', cmap=plt.cm.Blues, labels=classes) # print(img.get_shape()) # print(img.dtype) summary_writer.add_summary(confusion_summary.eval(session=sess, feed_dict={cm_placeholder: confusion_img}), step) del confusion_img acc_summary_histogram_out = sess.run(acc_summary_histogram, feed_dict={accuracy_batch : np.array(acc_list[-100:])}) summary_writer.add_summary(acc_summary_histogram_out, step) #print('done adding summary') num_valid = data['X_val'].shape[0] batch_valid_mask = np.random.choice(num_valid, batch_size) X_val_batch = data['X_val'][batch_valid_mask] y_val_batch = data['y_val'][batch_valid_mask] valid_dict = { X_image : X_val_batch, y_label : y_val_batch, keep_prob : 1.0, regularizer_weight : 0.00, is_training : False} format_str = ('{0}: step {1:>5d}, loss = {2:2.3f}, accuracy = {3:>3.2f}, accuracy (val) = {4:>3.2f}, loss = {5:2.3f}') valid_summary, valid_acc, valid_loss = sess.run([validation_acc_summary, accuracy_op, loss_op], feed_dict=valid_dict) valid_acc_list.append(valid_acc) #tqdm_val = valid_acc valid_acc_list = valid_acc_list[-100:] # Probably should change the slice size to be smaller (10 instead of 100) valid_accuracy_100_str = sess.run(validation_mean_summary, feed_dict={accuracy_batch : np.array(valid_acc_list)}) print(format_str.format(datetime.now(), step, loss_value, 100*accuracy, 100*valid_acc, valid_loss)) overfit_summary_str = sess.run(overfit_summary, feed_dict = {overfit_estimate : accuracy - valid_acc}) summary_writer.add_summary(overfit_summary_str, step) summary_writer.add_summary(valid_summary, step) summary_writer.add_summary(valid_accuracy_100_str, step) if (step % 5000 == 0 and step > 0) or (step + 1) == max_steps: checkpoint_path = os.path.join(train_dir, current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute) + 'model.ckpt') print("Checkpoint path = ", checkpoint_path) saver.save(sess, checkpoint_path, global_step=step, write_meta_graph=True) return 0
# plot the loss history plt.figure(1) plt.plot(stats['loss_history']) plt.xlabel('iteration') plt.ylabel('training loss') plt.title('Training Loss history') plt.show() # Load the data # Now that you have implemented a two-layer network that passes # gradient checks and works on toy data, it's time to load up our favorite # CIFAR-10 data so we can use it to train a classifier on a real dataset. # Invoke the get_CIFAR10_data function to get our data. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) # Visualize some images to get a feel for the data plt.figure(2) plt.imshow( visualize_grid(X_train[:100, :].reshape(100, 32, 32, 3), padding=3).astype('uint8')) plt.gca().axis('off') plt.show()
import matplotlib.pyplot as plt from cnn import * from data_utils import get_CIFAR10_data from solver import Solver data = get_CIFAR10_data() model = ThreeLayerConvNet(reg=0.9) solver = Solver(model, data, lr_decay=0.95, print_every=10, num_epochs=5, batch_size=2, update_rule='sgd_momentum', optim_config={ 'learning_rate': 5e-4, 'momentum': 0.9 }) solver.train() plt.subplot(2, 1, 1) plt.title('Training loss') plt.plot(solver.loss_history, 'o') plt.xlabel('Iteration') plt.subplot(2, 1, 2) plt.title('Accuracy') plt.plot(solver.train_acc_history, '-o', label='train') plt.plot(solver.val_acc_history, '-o', label='val') plt.plot([0.5] * len(solver.val_acc_history), 'k--')
test_image = np.array(ndimage.imread(buf)) plt.close() return test_image[np.newaxis, :] #CUDA_VISIBLE_DEVICES=0,1 #CUDA_VISIBLE_DEVICES=0 #CUDA_VISIBLE_DEVICES=1 #CUDA_VISIBLE_DEVICES="" config = tf.ConfigProto(device_count={'GPU': 0}) with tf.device('/cpu:0'): sess = tf.Session(config=config) data = get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=5000) print(len(data['y_test'])) X_image = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3]) y_label = tf.placeholder(dtype=tf.int64, shape=[None]) logits, grad_image, grad_image_placeholder = inference(X_image) top_k_op = tf.nn.in_top_k(predictions=logits, targets=y_label, k=1) ckpt = tf.train.get_checkpoint_state( './cifar10_results/LR_0.03/REG_0.11/KP_0.9/January_14_2017-h17m55/') saver = tf.train.Saver() if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) array = sess.run(top_k_op,
import data_utils as du import matplotlib.pyplot as plt dataDict = du.get_CIFAR10_data() xTrain = dataDict["X_train"] print(xTrain.shape) print(xTrain.shape[0])
import tensorflow as tf import numpy as np import math import data_utils import cv2 import matplotlib.pyplot as plt # get cifar10 dataset, you should specify the directory fo your cifar10 dataset if you use this funciton. data = data_utils.get_CIFAR10_data() X_train = data['X_train'] y_train = data['y_train'] X_val = data['X_val'] y_val = data['y_val'] X_test = data['X_test'] y_test = data['y_test'] print(X_train.shape) def neural_net_image_input(image_shape): """ Return a Tensor for a bach of image input : image_shape: Shape of the images : return: Tensor for image input. """ return tf.placeholder( tf.float32, [None, image_shape[0], image_shape[1], image_shape[2]], name='x') def batch_norm(x_tensor, name=None): mean, variance = tf.nn.moments(x_tensor, axes=[0])
def get_data(path='.//cifar-10-batches-py//' ): # cifar10_dir = './/cifar-10-batches-py//' return data_utils.get_CIFAR10_data(cifar10_dir=path)
def main(): print("Loading Data;") data = get_CIFAR10_data() for k, v in data.items(): print('%s: '%(k), v.shape) #PLACEHOLDER VARIABLES keep_prob = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.placeholder(dtype=tf.float32, shape=()) regularizer_weight = tf.placeholder(dtype=tf.float32, shape=()) #Not used --- ^ (currently) X_image = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3]) y_label = tf.placeholder(dtype=tf.int64, shape=[None]) #MODEL related operations and values global_step = tf.Variable(0, trainable=False) #MODEL construction logits = inference(X_image, keep_prob=keep_prob, regularizer_weight=regularizer_weight) prediction = predict(logits) loss_op = loss(logits, y_label) reg_loss = tf.reduce_sum(tf.get_collection(LOSSES_COLLECTION)) total_loss = loss_op + reg_loss accuracy_op = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits,1), y_label), tf.float32)) train_op = train(loss_op, global_step, learning_rate=INITIAL_LEARNING_RATE) saver = tf.train.Saver(tf.all_variables()) #Summary operation tf.image_summary('images', X_image) summary_op = tf.merge_all_summaries() # confusion_img_placeholder = tf.placeholder(dtype=tf.uint8, shape=[1,None,None,4]) # confusion_matrix_summary = tf.image_summary('confusion_matrix', confusion_img_placeholder) acc_summary = tf.scalar_summary('Training_accuracy (Batch)', accuracy_op) validation_acc_summary = tf.scalar_summary('Validation_accuracy', accuracy_op) cross_entropy_loss = tf.scalar_summary('loss_raw', loss_op) reg_loss_summary = tf.scalar_summary('regularization_loss', reg_loss) total_loss_summary = tf.scalar_summary('total_loss', total_loss) accuracy_batch = tf.placeholder(shape=(None), dtype=tf.float32) accuracy_100 = tf.reduce_mean(accuracy_batch) mean_summary = tf.scalar_summary('Training_accuracy (Mean)', accuracy_100) validation_mean_summary = tf.scalar_summary('Validation_accuracy (Mean)', accuracy_100) acc_histogram_summary = tf.histogram_summary('Training_accuracy (Histogram)', accuracy_batch) #SESSION Construction init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=False)) sess.run(init) #today = date.today() current_time = datetime.now() # LR_%f, INITIAL_LEARNING_RATE # REG_%f, DEFAULT_REG_WEIGHT # add details, relating per epoch results (and mean filtered loss etc.) train_dir = "cifar10_results/LR_" + str(INITIAL_LEARNING_RATE) + "/" + "REG_" + str(DEFAULT_REG_WEIGHT) + "/" + current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute) print("Writing summary data to : ",train_dir) summary_writer = tf.train.SummaryWriter(train_dir, sess.graph) acc_list = [] valid_acc_list = [] print("Starting Training.") print("Training for %d batches (of size %d); initial learning rate %f" % (MAX_STEPS, BATCH_SIZE, INITIAL_LEARNING_RATE)) for step in range(MAX_STEPS): num_train = data['X_train'].shape[0] if BATCH_SIZE * (step - 1) // num_train < BATCH_SIZE * (step) // num_train and step > 0: print("Completed Epoch: %d (step=%d, MAX_STEPS=%d, percentage complete= %f)" % ((BATCH_SIZE * (step) // num_train ), step, MAX_STEPS, step/MAX_STEPS * 100)) batch_mask = np.random.choice(num_train, BATCH_SIZE) X_batch = data['X_train'][batch_mask] y_batch = data['y_train'][batch_mask] start_time = time.time() feed_dict = { X_image : X_batch, y_label : y_batch, keep_prob : 0.8, regularizer_weight : 0.01 } loss_value, accuracy, acc_str, xentropy_str, reg_loss_str, predicted_class = sess.run([total_loss, accuracy_op, acc_summary, cross_entropy_loss, reg_loss_summary, prediction], feed_dict=feed_dict) #print(sess.run(prediction, feed_dict=feed_dict)) sess.run(train_op, feed_dict=feed_dict) acc_list.append(accuracy) accuracy_100_str = sess.run(mean_summary, feed_dict={accuracy_batch : np.array(acc_list[-100:])}) #print(sess.run([accuracy_100], feed_dict={accuracy_batch : np.array(acc_list[-100:])})) summary_writer.add_summary(acc_str, step) summary_writer.add_summary(xentropy_str, step) summary_writer.add_summary(reg_loss_str, step) summary_writer.add_summary(accuracy_100_str, step) #summary_writer.add_summary('Training_accuracy (Mean)', np.mean(acc_list[-100:]), step) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) if step % 10 == 0: if step > 0: confusion_buf = plot_confusion_matrix(confusion_matrix(y_batch, predicted_class), title='Confusion matrix', cmap=plt.cm.Blues, labels=classes) img = tf.image.decode_png(confusion_buf.getvalue(), channels=4) img = tf.expand_dims(img, 0) confusion_summary = tf.image_summary('confusion_matrix', img) summary_writer.add_summary(confusion_summary.eval(session=sess), step) plt.close() histogram_summary_out = sess.run(acc_histogram_summary, feed_dict={accuracy_batch : np.array(acc_list[-100:])}) summary_writer.add_summary(histogram_summary_out, step) num_valid = data['X_val'].shape[0] #batch_valid_mask = np.random.choice(num_valid, BATCH_SIZE) X_val_batch = data['X_val']#[batch_valid_mask] y_val_batch = data['y_val']#[batch_valid_mask] valid_dict = { X_image : X_val_batch, y_label : y_val_batch, keep_prob : 1.0, regularizer_weight : 0.00} format_str = ('{0}: step {1:>5d}, loss = {2:2.3f}, accuracy = {3:>3.2f}, accuracy (validation) = {4:>3.2f}') valid_summary, valid_acc = sess.run([validation_acc_summary, accuracy_op], feed_dict=valid_dict) valid_acc_list.append(valid_acc) # Probably should change the slice size to be smaller (10 instead of 100) valid_accuracy_100_str = sess.run(validation_mean_summary, feed_dict={accuracy_batch : np.array(valid_acc_list[-10:])}) print(format_str.format(datetime.now(), step, loss_value, accuracy*100, 100*valid_acc)) summary_writer.add_summary(valid_summary, step) summary_writer.add_summary(valid_accuracy_100_str, step) if (step % 500 == 0 and step > 0) or (step + 1) == MAX_STEPS: checkpoint_path = os.path.join(train_dir, current_time.strftime("%B") + "_" + str(current_time.day) + "_" + str(current_time.year) + "-h" + str(current_time.hour) + "m" + str(current_time.minute) + 'model.ckpt') print("Checkpoint path = ", checkpoint_path) saver.save(sess, checkpoint_path, global_step=step) return 0
.format(total_loss, total_correct, e+1)) if plot_losses: plt.plot(losses) plt.grid(True) plt.title('training losses') plt.xlabel('iteration number') plt.ylabel('loss') plt.show() return total_loss, total_correct if __name__ == '__main__': X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000) # convert to gray image X_train_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in X_train]).reshape([-1, 32, 32, 1]) X_val_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in X_val]).reshape([-1, 32, 32, 1]) X_test_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in X_test]).reshape([-1, 32, 32, 1]) # subtract the mean value mean_img = np.mean(X_train, axis=0).astype(np.float32) X_train = X_train.astype(np.float32) - mean_img X_val = X_val.astype(np.float32) - mean_img X_test = X_test.astype(np.float32) - mean_img mean_img_gray = np.mean(X_train_gray, axis=0).astype(np.float32) X_train_gray = X_train_gray.astype(np.float32) - mean_img_gray X_val_gray = X_val_gray.astype(np.float32) - mean_img_gray X_test_gray = X_test_gray.astype(np.float32) - mean_img_gray
""" SoftMax反向传播求导: https://blog.csdn.net/abc13526222160/article/details/84968161 """ import matplotlib.pyplot as plt from fc_net import * from data_utils import get_CIFAR10_data from solver import Solver data = get_CIFAR10_data( # num_training=30000, # num_validation=5000, # num_test=5000, # train_files=['./data/data_batch_1', './data/data_batch_2', './data/data_batch_3', './data/data_batch_4'], # test_file='data/test_batch' num_training=500, # 训练集个数 num_validation=50, # 训练集中验证集的个数 num_test=50, # 测试集个数 train_files=['./data/data_batch_1'], # 训练集数据 test_file='data/test_batch' # 测试集数据 ) model = TwoLayerNet(reg=0.9) # 正则化惩罚力度0.9 solver = Solver( model, data, # lr_decay=0.95, # print_every=100, # num_epochs=40, # batch_size=400, # update_rule='sgd_momentum',
return X, y.astype(int) if __name__ == '__main__': input_size = 4 hidden_size = 10 num_classes = 10 num_input = 5 # net = init_toy_model(input_size, hidden_size, num_classes) # net = FullConnectedNet(input_size, [100], num_classes) net = ConvolutionalNet(input_size=(32, 32, 3), layer_size=[(5, 3, 3)], output_size=num_classes) # X, y = init_toy_data(num_input, num_classes) X, y, _, _, _, _ = get_CIFAR10_data(5, 5, 5) '''matrix_y = np.zeros([num_input, num_classes]) matrix_y[range(num_input), y] = 1 net.forward(X, matrix_y) net.backward(1) grad = net.layers[-3].dW W = net.layers[-3].W f = lambda w: bn_test_net(net, w, X, matrix_y) gradient_check_sparse(f, W, grad)''' def f(W): net.layers[0].W = W num_data = X.shape[0] label_mat = np.zeros([num_data, num_classes]) label_mat[range(num_data), y] = 1 net.forward(X, label_mat) return net.loss
import numpy as np import utils as ut import nn import layers import loss as ls import optim import data_utils as dutil # Global variables X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = dutil.get_CIFAR10_data( ) n = X_train.shape[1] c = 10 Y_dev_enc = ut.encode_labels(y_dev) def test_CrossEntropyLoss(): np.random.seed(1) W = np.random.randn(c, n) * 0.0001 b = np.random.randn(c, 1) * 0.0001 layer_lin = layers.Linear(n, c, init_vals=(W.T, b.ravel())) loss_func = ls.CrossEntropy() net = nn.Network([layer_lin], loss_func, optimizer=None) my_loss = net.loss(X_dev, Y_dev_enc) assert (np.isclose(my_loss, -np.log(.1), atol=1e-2)) def test_CrossEntropy_Linear_Grad(): np.random.seed(1) W = np.random.randn(c, n) * 0.0001 b = np.random.randn(c, 1) * 0.0001