def setUp(self): fname = '../experiment/mnist.cnn' self.convnet = ConvNet.load(fname) mnist_filename = '../data/mnist.pkl' fin = file(mnist_filename, 'rb') tr, va, te = cPickle.load(fin) fin.close() training_set = np.vstack((tr[0], va[0])) training_label = np.hstack((tr[1], va[1])) test_set, test_label = te[0], te[1] training_size = training_set.shape[0] test_size = test_set.shape[0] # Convert data type into int32 training_label = training_label.astype(np.int32) test_label = test_label.astype(np.int32) # Check pprint('Dimension of Training data set: (%d, %d)' % training_set.shape) pprint('Dimension of Test data set: (%d, %d)' % test_set.shape) # Shuffle train_rand_shuffle = np.random.permutation(training_size) test_rand_shuffle = np.random.permutation(test_size) training_set = training_set[train_rand_shuffle, :] training_label = training_label[train_rand_shuffle] test_set = test_set[test_rand_shuffle, :] test_label = test_label[test_rand_shuffle] self.test_set = test_set.reshape((10000, 1, 28, 28)) self.test_label = test_label
def evaluate(eval_images, eval_labels): # evaluation input_x = tf.placeholder(tf.float32, shape=[None, 28 * 28], name='input_x') input_y = tf.placeholder(tf.float32, shape=[None, 10], name='input_y') keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') model = ConvNet(input_x, input_y, keep_prob) saver = tf.train.Saver() sess = tf.Session() saver.restore(sess, tf.train.latest_checkpoint('saved')) with sess.as_default(): with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(model.output, 1), tf.argmax(model.input_y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) feed_dict = { model.input_x: eval_images, model.input_y: eval_labels, model.dropout_keep_prob: 1.0 } acc = sess.run(accuracy, feed_dict=feed_dict) return acc
def train(batch_size=50, epochs=2500, learning_rate=1e-4, dropout_keep_prob=0.5): input_x = tf.placeholder(tf.float32, shape=[None, 28 * 28], name='input_x') input_y = tf.placeholder(tf.float32, shape=[None, 10], name='input_y') keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') model = ConvNet(input_x, input_y, keep_prob) output = model.output # cost function with tf.name_scope("cross_entropy"): cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=input_y)) # optimisation function with tf.name_scope("optimizer"): optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # Get accuracy of model with tf.name_scope("accuracy"): correct_prediction = tf.equal(tf.argmax(input_y, 1), tf.argmax(output, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # start TensorFlow session saver = tf.train.Saver() sess = tf.Session() sess.run(tf.global_variables_initializer()) display_step = 1 train_images, train_labels = data_helpers.load_data_and_labels( 'data/train.csv') with sess.as_default(): batches = data_helpers.batch_iter( list(zip(train_images, train_labels)), batch_size, epochs) for step, batch in enumerate(batches): batch_xs, batch_ys = zip(*batch) feed_dict = { input_x: batch_xs, input_y: batch_ys, keep_prob: dropout_keep_prob } loss, _ = sess.run([cost, optimizer], feed_dict=feed_dict) if step % display_step == 0 or (step + 1) == epochs: time_str = datetime.datetime.now().isoformat() saver.save(sess, "saved/model.ckpt", step) acc = sess.run(accuracy, feed_dict={ input_x: batch_xs, input_y: batch_ys, keep_prob: 1.0 }) print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, acc)) if step % (display_step * 10) == 0 and step: display_step *= 10
def main(): from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() #data preprocessing for neural network with fully-connected layers # data = { # 'X_train': np.array(x_train[:55000], np.float32).reshape((55000, -1)), # training data # 'y_train': np.array(y_train[:55000], np.int32), # training labels # 'X_val': np.array(x_train[55000:], np.float32).reshape((5000, -1)), # validation data # 'y_val': np.array(y_train[55000:], np.int32), # validation labels # } # model = SoftmaxClassifier(hidden_dim=10000) # data preprocessing for neural network with convolutional layers data = { 'X_train': np.array(x_train[:55000], np.float32).reshape( (55000, 1, 28, 28)), # training data 'y_train': np.array(y_train[:55000], np.int32), # training labels 'X_val': np.array(x_train[55000:], np.float32).reshape( (5000, 1, 28, 28)), # validation data 'y_val': np.array(y_train[55000:], np.int32), # validation labels } model = ConvNet(hidden_dim=100, filter_size=9) # running experiments with convolutional neural network could be time-consuming # you may use a small number of training samples for debugging # and then take use of all training data to report your experimental results. solver = Solver(model, data, update_rule='sgd', optim_config={ 'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=10) solver.train() # Plot the training losses plt.plot(solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() plt.savefig('loss.png') plt.close() #test_acc = solver.check_accuracy(X=np.array(x_test, np.float32).reshape((10000, -1)), y=y_test) test_acc = solver.check_accuracy(X=np.array(x_test, np.float32).reshape( (10000, 1, 28, 28)), y=y_test) print('Test accuracy', test_acc)
def runCNN_multiclass(): f = gzip.open('../mnist.pkl.gz', 'rb') train_set, valid_set, test_set = pickle.load(f, encoding='latin1') f.close() exp_name = sys.argv[1] if len(sys.argv) > 2: cont_exp = sys.argv[2] else: cont_exp = None os.makedirs(exp_name, exist_ok=True) logger = open("./" + exp_name + "/log", "w") datapack = (np.concatenate((train_set[0], valid_set[0]), axis=0), np.concatenate((train_set[1], valid_set[1]), axis=0)) data = { 'X_train': datapack[0][:55000], # training data 'y_train': datapack[1][:55000], # training labels 'X_val': datapack[0][55000:], # validation data 'y_val': datapack[1][55000:] # validation labels } ConvConfig = { 'input_dim': (1, 28, 28), 'num_filters': 32, 'filter_size': 7, 'hidden_dim': 100, 'num_classes': 10, 'weight_scale': 1e-3, 'reg': 0., 'bn': True, 'dropout': True, 'cont_exp': cont_exp } logger.write(str(ConvConfig) + '\n') model = ConvNet(**ConvConfig) solver = Solver(model, data, logger, update_rule='adam', optim_config={ 'learning_rate': 0.001, }, lr_decay=0.95, num_epochs=5, batch_size=100, print_every=10, exp_name=exp_name) solver.train() test_acc = solver.check_accuracy(test_set[0], test_set[1]) toprint = "test_acc: " + str(test_acc) print(toprint) logger.write(toprint) logger.flush()
def testCNN(self): conf_filename = './sentiment_cnn.conf' # Build the architecture of CNN start_time = time.time() configer = CNNConfiger(conf_filename) convnet = ConvNet(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of CNN: %f seconds' % (end_time - start_time)) # Training learn_rate = 0.5 batch_size = configer.batch_size num_batches = self.train_size / batch_size start_time = time.time() for i in xrange(configer.nepoch): right_count = 0 tot_cost = 0 # rate = learn_rate rate = learn_rate / (i / 100 + 1) for j in xrange(num_batches): minibatch = self.senti_train_set[j * batch_size:(j + 1) * batch_size, :] minibatch = minibatch.reshape( (batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_train_label[j * batch_size:(j + 1) * batch_size] cost, accuracy = convnet.train(minibatch, label, rate) prediction = convnet.predict(minibatch) right_count += np.sum(label == prediction) tot_cost += cost # pprint('Epoch %d, batch %d, cost = %f, local accuracy: %f' % (i, j, cost, accuracy)) accuracy = right_count / float(self.train_size) pprint('Epoch %d, total cost: %f, overall accuracy: %f' % (i, tot_cost, accuracy)) ConvNet.save('./sentiment.cnn', convnet) end_time = time.time() pprint( 'Time used to train CNN on Sentiment analysis task: %f minutes.' % ((end_time - start_time) / 60)) # Test num_batches = self.test_size / batch_size right_count = 0 for i in xrange(num_batches): minibatch = self.senti_test_set[i * batch_size:(i + 1) * batch_size, :] minibatch = minibatch.reshape( (batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_test_label[i * batch_size:(i + 1) * batch_size] prediction = convnet.predict(minibatch) right_count += np.sum(prediction == label) test_accuracy = right_count / float(self.test_size) pprint('Test set accuracy: %f' % test_accuracy)
def inference(test_images): # evaluation input_x = tf.placeholder(tf.float32, shape=[None, 28 * 28], name='input_x') input_y = tf.placeholder(tf.float32, shape=[None, 10], name='input_y') keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') model = ConvNet(input_x, input_y, keep_prob) saver = tf.train.Saver() sess = tf.Session() saver.restore(sess, tf.train.latest_checkpoint('saved')) with sess.as_default(): # prediction function with tf.name_scope('predict'): # [0.1, 0.9, 0.2, 0.1, 0.1 0.3, 0.5, 0.1, 0.2, 0.3] => 1 predict = tf.argmax(model.output, 1) feed_dict = {model.input_x: test_images, model.dropout_keep_prob: 1.0} predictions = sess.run(predict, feed_dict=feed_dict) return predictions
def testCNN(self): conf_filename = './sentiment_cnn.conf' # Build the architecture of CNN start_time = time.time() configer = CNNConfiger(conf_filename) convnet = ConvNet(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of CNN: %f seconds' % (end_time-start_time)) # Training learn_rate = 0.5 batch_size = configer.batch_size num_batches = self.train_size / batch_size start_time = time.time() for i in xrange(configer.nepoch): right_count = 0 tot_cost = 0 # rate = learn_rate rate = learn_rate / (i/100+1) for j in xrange(num_batches): minibatch = self.senti_train_set[j*batch_size : (j+1)*batch_size, :] minibatch = minibatch.reshape((batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_train_label[j*batch_size : (j+1)*batch_size] cost, accuracy = convnet.train(minibatch, label, rate) prediction = convnet.predict(minibatch) right_count += np.sum(label == prediction) tot_cost += cost # pprint('Epoch %d, batch %d, cost = %f, local accuracy: %f' % (i, j, cost, accuracy)) accuracy = right_count / float(self.train_size) pprint('Epoch %d, total cost: %f, overall accuracy: %f' % (i, tot_cost, accuracy)) ConvNet.save('./sentiment.cnn', convnet) end_time = time.time() pprint('Time used to train CNN on Sentiment analysis task: %f minutes.' % ((end_time-start_time)/60)) # Test num_batches = self.test_size / batch_size right_count = 0 for i in xrange(num_batches): minibatch = self.senti_test_set[i*batch_size : (i+1)*batch_size, :] minibatch = minibatch.reshape((batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_test_label[i*batch_size : (i+1)*batch_size] prediction = convnet.predict(minibatch) right_count += np.sum(prediction == label) test_accuracy = right_count / float(self.test_size) pprint('Test set accuracy: %f' % test_accuracy)
import os, sys import cPickle import time from pprint import pprint import numpy as np sys.path.append('../source/') from cnn import ConvNet from config import CNNConfiger np.random.seed(42) mnist_filename = '../data/mnist.pkl' conf_filename = './mnist.conf' # Build architecture of CNN from the configuration file start_time = time.time() configer = CNNConfiger(conf_filename) convnet = ConvNet(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of CNN: %f seconds' % (end_time-start_time)) # Load data and train via minibatch fin = file(mnist_filename, 'rb') tr, va, te = cPickle.load(fin) fin.close() training_set = np.vstack((tr[0], va[0])) training_label = np.hstack((tr[1], va[1])) test_set, test_label = te[0], te[1] training_size = training_set.shape[0] test_size = test_set.shape[0] # Convert data type into int32 training_label = training_label.astype(np.int32) test_label = test_label.astype(np.int32) # Check
def testCNNwithFineTuning(self): ''' Test the performance of CNN with fine-tuning the word-embedding. ''' pprint('CNN with fine-tuning experiment') conf_filename = './sentiment_cnn.conf' # Build the architecture of CNN start_time = time.time() configer = CNNConfiger(conf_filename) convnet = ConvNet(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of CNN: %f seconds' % (end_time - start_time)) # Training learn_rate = 1 batch_size = configer.batch_size num_batches = self.train_size / batch_size start_time = time.time() # Define function to do the fine-tuning grad_to_input = T.grad(convnet.cost, convnet.input) compute_grad_to_input = theano.function( inputs=[convnet.input, convnet.truth], outputs=grad_to_input) # Begin training and fine-tuning the word-embedding matrix for i in xrange(configer.nepoch): right_count = 0 tot_cost = 0 # rate = learn_rate rate = learn_rate / (i / 100 + 1) for j in xrange(num_batches): # Record the information of each minibatch minibatch_len = list() minibatch_indices = list() # Dynamically building training matrix using current word-embedding matrix minibatch_txt = self.senti_train_txt[j * batch_size:(j + 1) * batch_size] minibatch = np.zeros( (batch_size, self.word_embedding.embedding_dim()), dtype=floatX) for k, txt in enumerate(minibatch_txt): words = txt.split() words = [word.lower() for word in words] vectors = np.asarray( [self.word_embedding.wordvec(word) for word in words]) minibatch[k, :] = np.mean(vectors, axis=0) # Record the length of each sentence minibatch_len.append(len(words)) # Record the index of each word in each sentence minibatch_indices.append([ self.word_embedding.word2index(word) for word in words ]) # Reshape into the form of input to CNN minibatch = minibatch.reshape( (batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_train_label[j * batch_size:(j + 1) * batch_size] # Training cost, accuracy = convnet.train(minibatch, label, rate) prediction = convnet.predict(minibatch) right_count += np.sum(label == prediction) tot_cost += cost # Fine-tuning for word-vector matrix grad_minibatch = compute_grad_to_input(minibatch, label) grad_minibatch = grad_minibatch.reshape( (batch_size, self.word_embedding.embedding_dim())) # Updating the word2vec matrix minibatch_len = np.asarray(minibatch_len) grad_minibatch /= minibatch_len[:, np.newaxis] for k, indices in enumerate(minibatch_indices): for l in indices: self.word_embedding._embedding[ l, :] -= 0.01 * rate * grad_minibatch[k, :] accuracy = right_count / float(self.train_size) pprint('Epoch %d, total cost: %f, overall accuracy: %f' % (i, tot_cost, accuracy)) if (i + 1) % 100 == 0: ConvNet.save('./sentiment.cnn', convnet) end_time = time.time() pprint( 'Time used to train CNN on Sentiment analysis task: %f minutes.' % ((end_time - start_time) / 60)) # Test num_batches = self.test_size / batch_size right_count = 0 for i in xrange(num_batches): minibatch_txt = self.senti_test_txt[i * batch_size:(i + 1) * batch_size] minibatch = np.zeros( (batch_size, self.word_embedding.embedding_dim()), dtype=floatX) for j, txt in enumerate(minibatch_txt): words = txt.split() words = [word.lower() for word in words] vectors = np.asarray( [self.word_embedding.wordvec(word) for word in words]) minibatch[j, :] = np.mean(vectors, axis=0) # Reshape into the form of input to CNN minibatch = minibatch.reshape( (batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_test_label[i * batch_size:(i + 1) * batch_size] prediction = convnet.predict(minibatch) right_count += np.sum(prediction == label) test_accuracy = right_count / float(self.test_size) pprint('Test set accuracy: %f' % test_accuracy)
# Set Configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Prepare Test Data test_data = torchvision.datasets.MNIST(root='./datasets', train=False, transform=transforms.ToTensor()) # Define Test Dataloader test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=100, shuffle=True) # Load Model and Trained Parameters model_test = ConvNet(10).to(device) model_test.load_state_dict(torch.load('./model.pth', map_location=device)) model_test.eval() # Evaluate with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model_test(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()
# print(role_label2id, role_id2label) trigger_label2id, trigger_id2label = {}, {} with open('./dict/vocab_trigger_label_map.txt', 'r', encoding='utf-8') as f: for line in f.readlines(): label, iid = line.split() trigger_label2id[label] = iid trigger_id2label[iid] = label # print(trigger_label2id, trigger_id2label) batch_size = args.batch_size device = torch.device(args.cuda) tokenizer = BertTokenizer.from_pretrained('../roberta/vocab.txt', do_lower_case=False) # bert_embeddings = np.load("bert_embeddings.npy") model = Model(1, batch_size, device) parameters_to_optimize = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] parameters_to_optimize = [{ 'params': [ p for n, p in parameters_to_optimize if not any(nd in n for nd in no_decay) and 'bert' in n ], 'weight_decay': 0.01 }, { 'params': [ p for n, p in parameters_to_optimize if any(nd in n for nd in no_decay) and 'bert' in n ],
[torch.tensor(i, dtype=torch.long) for i in emnist_loader.test_labels]) test_data = torch.stack([i.unsqueeze(0) for i in test_data]) test_data = test_data.type(torch.cuda.FloatTensor) # Create your dataset and dataloader test_dataset = utils.TensorDataset(test_data, test_labels) test_loader = utils.DataLoader(test_dataset, shuffle=True) model_file = Path(os.path.join(SAVED_NETWORK_PATH, network_name)) if model_file.is_file(): model = torch.load(model_file) model.eval() else: # Create your network model = ConvNet(num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i, (t_i, t_l) in enumerate(train_loader): t_i = t_i.to(device=device) t_l = t_l.to(device=device) optimizer.zero_grad() # Forward pass
def testBuilding(self): # Try to build CNN without actually running the algorithm convNet = ConvNet(self.configer, verbose=True)
def testCNNwithFineTuning(self): ''' Test the performance of CNN with fine-tuning the word-embedding. ''' pprint('CNN with fine-tuning experiment') conf_filename = './sentiment_cnn.conf' # Build the architecture of CNN start_time = time.time() configer = CNNConfiger(conf_filename) convnet = ConvNet(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of CNN: %f seconds' % (end_time-start_time)) # Training learn_rate = 1 batch_size = configer.batch_size num_batches = self.train_size / batch_size start_time = time.time() # Define function to do the fine-tuning grad_to_input = T.grad(convnet.cost, convnet.input) compute_grad_to_input = theano.function(inputs=[convnet.input, convnet.truth], outputs=grad_to_input) # Begin training and fine-tuning the word-embedding matrix for i in xrange(configer.nepoch): right_count = 0 tot_cost = 0 # rate = learn_rate rate = learn_rate / (i/100+1) for j in xrange(num_batches): # Record the information of each minibatch minibatch_len = list() minibatch_indices = list() # Dynamically building training matrix using current word-embedding matrix minibatch_txt = self.senti_train_txt[j*batch_size : (j+1)*batch_size] minibatch = np.zeros((batch_size, self.word_embedding.embedding_dim()), dtype=floatX) for k, txt in enumerate(minibatch_txt): words = txt.split() words = [word.lower() for word in words] vectors = np.asarray([self.word_embedding.wordvec(word) for word in words]) minibatch[k, :] = np.mean(vectors, axis=0) # Record the length of each sentence minibatch_len.append(len(words)) # Record the index of each word in each sentence minibatch_indices.append([self.word_embedding.word2index(word) for word in words]) # Reshape into the form of input to CNN minibatch = minibatch.reshape((batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_train_label[j*batch_size : (j+1)*batch_size] # Training cost, accuracy = convnet.train(minibatch, label, rate) prediction = convnet.predict(minibatch) right_count += np.sum(label == prediction) tot_cost += cost # Fine-tuning for word-vector matrix grad_minibatch = compute_grad_to_input(minibatch, label) grad_minibatch = grad_minibatch.reshape((batch_size, self.word_embedding.embedding_dim())) # Updating the word2vec matrix minibatch_len = np.asarray(minibatch_len) grad_minibatch /= minibatch_len[:, np.newaxis] for k, indices in enumerate(minibatch_indices): for l in indices: self.word_embedding._embedding[l, :] -= 0.01 * rate * grad_minibatch[k, :] accuracy = right_count / float(self.train_size) pprint('Epoch %d, total cost: %f, overall accuracy: %f' % (i, tot_cost, accuracy)) if (i+1)%100 == 0: ConvNet.save('./sentiment.cnn', convnet) end_time = time.time() pprint('Time used to train CNN on Sentiment analysis task: %f minutes.' % ((end_time-start_time)/60)) # Test num_batches = self.test_size / batch_size right_count = 0 for i in xrange(num_batches): minibatch_txt = self.senti_test_txt[i*batch_size : (i+1)*batch_size] minibatch = np.zeros((batch_size, self.word_embedding.embedding_dim()), dtype=floatX) for j, txt in enumerate(minibatch_txt): words = txt.split() words = [word.lower() for word in words] vectors = np.asarray([self.word_embedding.wordvec(word) for word in words]) minibatch[j, :] = np.mean(vectors, axis=0) # Reshape into the form of input to CNN minibatch = minibatch.reshape((batch_size, 1, configer.image_row, configer.image_col)) label = self.senti_test_label[i*batch_size : (i+1)*batch_size] prediction = convnet.predict(minibatch) right_count += np.sum(prediction == label) test_accuracy = right_count / float(self.test_size) pprint('Test set accuracy: %f' % test_accuracy)
import matplotlib.pyplot as plt from cnn import ConvNet from bin.mnist import load_mnist from bin.trainer import Trainer (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) x_train, t_train = x_train[:10000], t_train[:10000] x_test, t_test = x_test[:2000], t_test[:2000] max_epochs = 20 network = ConvNet(input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=max_epochs, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=1000) trainer.train()
### Load Data train_dir = '~/datasets/font/npy_train'.replace('~', os.path.expanduser('~')) train_data = FontDataset(train_dir) test_dir = '~/datasets/font/npy_test'.replace('~', os.path.expanduser('~')) test_data = FontDataset(test_dir) ### Define Dataloader train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size) test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size) ### Define Model and Load Params model = ConvNet().to(device) print("========================== Original Model =============================", "\n", model) model.load_state_dict(torch.load('./pths/cifar10_pre_model.pth', map_location=device)) ### User pre-trained model and Only change last layer for param in model.parameters(): param.requires_grad = False model.fc2 = nn.Linear(120, 50) modle = model.to(device) print("========================== Modified Model =============================", "\n", model) ### Define Loss and Optim criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr)
def my_cnn(data, data_test): # 3 layers print('3 layer CNN with softmax loss:') np.random.seed(598) model_cnn = ConvNet( num_filters=32, filter_size=5, hidden_dim=512, num_classes=10, reg=0.0, dropout = 0, normalization = True ) net_cnn = Solver( model=model_cnn, data=data, update_rule='adam', optim_config={ 'learning_rate': 0.01 }, lr_decay=1.0, batch_size=50, num_epochs=15, verbose=True, print_every=1 ) print(' training...') net_cnn.train() print(' best val acc : %f ' % net_cnn.best_val_acc) print(' test acc : %f\n ' % net_cnn.check_accuracy(data_test['X'], data_test['y'], num_samples=1000)) # 3 layers print('3 layer CNN with softmax loss:') np.random.seed(598) model_cnn = ConvNet( num_filters=32, filter_size=5, hidden_dim=512, num_classes=10, reg=0.0, dropout = 0.3, normalization = True ) net_cnn = Solver( model=model_cnn, data=data, update_rule='adam', optim_config={ 'learning_rate': 0.01 }, lr_decay=1.0, batch_size=50, num_epochs=10, verbose=True, print_every=1 ) print(' training...') net_cnn.train() print(' best val acc : %f ' % net_cnn.best_val_acc) print(' test acc : %f\n ' % net_cnn.check_accuracy(data_test['X'], data_test['y'], num_samples=1000))
import numpy as np from keras.datasets import mnist from cnn import ConvNet print("Loading MNIST") (x_train, y_train), (x_test, y_test) = mnist.load_data() train_images = x_train[:1500] train_labels = y_train[:1500] test_images = x_test[:1500] test_labels = y_test[:1500] input_shape = train_images[0].shape convnet = ConvNet(filter_size=3, num_filters=9, pool_size=2, input_shape=input_shape, out_dim=10) print("CNN layers created") def preprocess_image(image): return (image/255) - 0.5 def forward_cnn(image, label): out = convnet.forward(preprocess_image(image)) cross_ent_loss = -np.log(out[label]) accuracy = 1 if np.argmax(out) == label else 0 return out, cross_ent_loss, accuracy def train(image, label, learning_rate=0.005): out, loss, acc = forward_cnn(image, label) gradient = np.zeros(10) gradient[label] = -1/out[label]
np.save('mnist_data.npy',X) np.save('mnist_labels.npy',y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1) X_train = X_train.reshape(-1, 1, 28, 28) X_val = X_val.reshape(-1, 1, 28, 28) X_test = X_test.reshape(-1, 1, 28, 28) # N, D = X_train.shape print (X_train.shape) data = {'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': X_test, 'y_test': y_test } model = ConvNet() solver = Solver(model, data, update_rule='sgd', optim_config={ 'learning_rate': 2e-3, }, lr_decay=1, num_epochs=1, batch_size=50, print_every=2) solver.train() acc = solver.check_accuracy(X=X_test, y=y_test) print(acc)
num_workers=4), 'val': DataLoader(data['val'], batch_size=batch_size, shuffle=True, num_workers=4) } ############# ### MODEL ### ############# from torch import nn, optim from cnn import ConvNet num_epochs = 10 learning_rate = 0.001 net = ConvNet() net.to(device) criterion = nn.MSELoss() optimizer = optim.Adam(net.parameters(), lr=learning_rate, betas=(0.5, 0.999)) ################ ### TRAINING ### ################ import matplotlib.pyplot as plt n_batches = len(loader['train']) for epoch in range(num_epochs): for i, sample in enumerate(loader['train']):