def main(): X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]] X_train = X_train[:dev_split_index] y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) # Load model input_dimension = img_rows * img_cols model = CNN(input_dimension) # TODO add proper layers to CNN class above # Train train_model(train_batches, dev_batches, model) ## Evaluate the model on test data loss, acc = run_epoch(test_batches, model.eval(), None) print('Test loss1: {:.6f} accuracy1: {:.6f} loss2: {:.6f} accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))
def model_0(): model = simple_rnn_model(input_dim=161) train_model(input_to_softmax=model, pickle_path='model_0.pickle', save_model_path='model_0.h5', spectrogram=True) pass
def main(input_dim, train_desc_file, valid_desc_file, pickle_path, save_model_path, epochs=20): # model = mmodel(input_dim=input_dim, filters=1280, kernel_size=11, # conv_stride=2, conv_border_mode='same', units=800, output_dim=95) model = mmodel1(input_dim=input_dim, filters=512, kernel_size=5, conv_stride=1, conv_border_mode='same', units=1024, output_dim=95) # model = mmodel2(input_dim=input_dim, filters=640, kernel_sizes=[5, 5], # strides=[1, 1], units_birnn=1280, units_fc=800, # conv_border_mode='same', output_dim=95) train_model(input_to_softmax=model, train_json=train_desc_file, valid_json=valid_desc_file, pickle_path=pickle_path, save_model_path=save_model_path, epochs=epochs)
def main(): in_arg = train_utils.get_cmd_args() dataloaders, class_to_idx = train_utils.transform_data(in_arg.data_dir) hidden_size = in_arg.hidden_units.split(',') hidden_size = [int(x) for x in hidden_size] if in_arg.arch == "vgg16": arch = { "model": train_utils.models.vgg16(pretrained=True), "input_size": 25088, "name": "vgg16" } elif in_arg.arch == "densenet": arch = { "model": train_utils.models.densenet161(pretrained=True), "input_size": 2208, "name": "densenet" } elif in_arg.arch == "alexnet": arch = { "model": train_utils.models.alexnet(pretrained=True), "input_size": 9216, "name": "alexnet" } else: print("model not available!") print("Create Model: {} Classifier: {},{},{} Learnrate: {}".format( arch["name"], arch["input_size"], hidden_size, len(class_to_idx), in_arg.learning_rate)) model, criterion, optimizer = train_utils.create_model( arch["model"], class_to_idx, input_size=arch["input_size"], hidden_size=hidden_size, output_size=len(class_to_idx), lr=in_arg.learning_rate) print("Begin Training.. Epochs: {} @{}".format( in_arg.epochs, "GPU" if in_arg.gpu else "CPU")) train_utils.train_model(model, dataloaders["train"], dataloaders["valid"], optimizer, criterion, epochs=in_arg.epochs, gpu=in_arg.gpu) if not os.path.exists(in_arg.save_dir.split('/')[0] + "/"): os.makedirs(in_arg.save_dir.split('/')[0] + "/") train_utils.save_model(arch["name"], model, optimizer, criterion, path=in_arg.save_dir)
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") args = parse_args() train_dataset = DenoisingDataset(args.train_dir) val_dataset = DenoisingDataset(args.val_dir) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) model = CRNN().to(device) train_model(model, train_loader, val_loader, args.epochs, args.learning_rate, device, log=True) model_dir = os.path.dirname(args.model_save_path) if not os.path.exists(model_dir) and model_dir: os.mkdir(model_dir) torch.save(model.state_dict(), args.model_save_path, _use_new_zipfile_serialization=False)
def main(): print(f"Device: {device}") X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]] X_train = X_train[:dev_split_index] y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]] # Split dataset into batches train_batches = batch_data(X_train, y_train, batch_size) dev_batches = batch_data(X_dev, y_dev, batch_size) test_batches = batch_data(X_test, y_test, batch_size) # Load model input_dimension = img_rows * img_cols model = MLP(input_dimension).to(device) # Train train_model(train_batches, dev_batches, model, n_epochs=n_epochs) # Evaluate the model on test data loss, acc = run_epoch(test_batches, model.eval(), None) print(f'Test loss1: {loss[0]:.6f} accuracy1: {acc[0]:.6f} loss2: {loss[1]:.6f} accuracy2: {acc[1]:.6f}')
def model_4_b(): model_4 = bidirectional_rnn_model( input_dim=13, # change to 13 if you would like to use MFCC features units=200) train_model(input_to_softmax=model_4, pickle_path='model_4_b.pickle', save_model_path='model_4_b.h5', sort_by_duration=True, spectrogram=False)
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to reshape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and validation dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] # Shuffle the data permutation = torch.randperm(X_train.shape[0]) X_train = X_train[permutation] y_train = y_train[permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# # Model specification model = nn.Sequential( nn.Conv2d(1, 32, (3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), nn.Conv2d(32, 64, (3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), Flatten(), nn.Linear(1600, 128), nn.Dropout(0.5), nn.Linear(128, 10), ) ################################## # Moving model and data to GPU if torch.cuda.is_available(): print("----------------- Using the Device: CUDA -----------------") model = model.to('cuda') else: print("----------------- Using the Device: CPU ----------------- ") train_model(train_batches, dev_batches, model, nesterov=True) # Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( # (num pictures × dimensions × size[0] × size[1]) # input: num × 1 × 28 × 28 nn.Conv2d(1, 32, (3, 3)), # after conv1: num × 32 × 26 × 26 nn.ReLU(), nn.MaxPool2d((2, 2)), # after pool1: num × 32 × 13 × 13 nn.Conv2d(32, 64, (3, 3)), # after cov2: num × 64 × 11 × 11 nn.ReLU(), nn.MaxPool2d((2, 2)), # after pool2: num × 64 × 5 × 5 Flatten(), # input for linear is 64 × 5 × 5 nn.Linear(1600, 128), nn.Dropout(p=0.5), nn.Linear(128, 10)) ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO #pragma: coderesponse template name="pytorchcnn" dedent="true" model = nn.Sequential( nn.Conv2d( 1, 32, (3, 3)), #out_size 26 x 26 x 32 (in_size - (kernel_size-1)) nn.ReLU(), nn.MaxPool2d( (2, 2)), # out_size 13x13 x32 (in_size - (kernel_size-1)-1)/stride +1 nn.Conv2d(32, 64, (3, 3)), #out_size 11x11x64 nn.ReLU(), nn.MaxPool2d((2, 2)), #out_size 5x5x64 = 1600 Flatten(), nn.Linear(1600, 128), nn.Dropout(p=0.5), nn.Linear(128, 10)) #pragma: coderesponse end ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def model_3(): model_3 = deep_rnn_model( input_dim=161, # change to 13 if you would like to use MFCC features units=200, recur_layers=3) train_model(input_to_softmax=model_3, pickle_path='model_3.pickle', save_model_path='model_3.h5', spectrogram=True) pass
def model_1(): model_1 = rnn_model( input_dim=13, # change to 13 if you would like to use MFCC features units=200, activation='relu') train_model(input_to_softmax=model_1, pickle_path='model_1.pickle', save_model_path='model_1.h5', spectrogram=False) pass
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# # Model specification TODO model = nn.Sequential( # Valid Convolution with 3x3 Kernel: 28x28 -> 26x26 nn.Conv2d(1, 32, (3, 3)), nn.ReLU(), # Pooling with 2x2 Kernel: 26x26 -> 13x13 nn.MaxPool2d((2, 2)), # Valid Convolution with 3x3 Kernel: 13x13 -> 11x11 nn.Conv2d(32, 64, (3, 3)), nn.ReLU(), # Pooling with 2x2 Kernel: 11x11 to 5x5 nn.MaxPool2d((2, 2)), Flatten(), # Flattening: 5x5x64 -> 1600 nn.Linear(1600, 128), nn.Dropout(), nn.Linear(128, 10)) ################################## train_model(train_batches, dev_batches, model, nesterov=True) # Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def test_models(): units = [32, 64, 128] for unit in units: model_0 = simple_rnn_model(input_dim=161, unit=unit) train_model(input_to_softmax=model_0, pickle_path='model_0_' + str(unit) + '.pickle', save_model_path='model_0' + str(unit) + '.h5', spectrogram=True)
def model_final(): model_final = final_model( input_dim=13, # change to 13 if you would like to use MFCC features filters=200, kernel_size=11, conv_stride=2, conv_border_mode='valid', units=200) train_model(input_to_softmax=model_final, pickle_path='model_final.pickle', save_model_path='model_final.h5', spectrogram=False)
def model_2(): model_2 = cnn_rnn_model( input_dim=161, # change to 13 if you would like to use MFCC features filters=200, kernel_size=11, conv_stride=1, conv_border_mode='valid', units=200) train_model(input_to_softmax=model_2, pickle_path='model_2.pickle', save_model_path='model_2.h5', spectrogram=True) pass
def oldmain(classes, batch, eta, momentum, LeakyReLU): # Load the dataset num_classes = classes X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = batch train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO if not LeakyReLU: model = nn.Sequential( nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10), ) else: model = nn.Sequential( nn.Linear(784, 128), nn.LeakyReLU(), nn.Linear(128, 10), ) lr = eta #momentum=0 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return accuracy
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() print(len(X_train), 'len(X_train)') print(len(X_train[0]), 'len(X_train)[0]') print(y_train.shape, 'y_train.shape') # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) print(dev_split_index, 'dev_split_index') X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] print(X_train[1].shape, 'X_train') print(y_train[1], 'y_train[1]') # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) # print(train_batches,'train_batches.') dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, 10), nn.LeakyReLU(), nn.Linear(10, 10), ) lr = 0.1 momentum = 0 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Get Command Line Arguments args = ParseCommandLine() #Print data directory print("Data directory: ", args.data_directory) #Print device used use_gpu = torch.cuda.is_available() and args.gpu if use_gpu: print("Training on GPU.") else: print("Training on CPU.") #Print out architecture and hyperparameters print("Architecture: {}".format(args.arch)) print("Learning rate: {}".format(args.learning_rate)) print("Hidden units: {}".format(args.hidden_units)) print("Epochs: {}".format(args.epochs)) #Print our dave_dir option if args.save_dir: print("Checkpoint save directory: {}".format(args.save_dir)) #-------------------------------------------------------------------- # Get data loaders train_loader, valid_loader, test_loader, class_to_idx = train_utils.load_data( args.data_directory) #-------------------------------------------------------------------- # Build the model model = train_utils.build_model(args.arch, args.hidden_units) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate) model.class_to_idx = class_to_idx #-------------------------------------------------------------------- #Train the model train_utils.train_model(model, args.epochs, args.learning_rate, use_gpu, criterion, optimizer, train_loader, valid_loader) #-------------------------------------------------------------------- #Validation on the test set test_loss, accuracy = train_utils.validate_model(model, criterion, test_loader) print("Validation on the test set") print(f"Test accuracy: {accuracy:.2f}%") #-------------------------------------------------------------------- # Save the checkpoint if input_args.save_dir: save_checkpoint(args.arch, args.learning_rate, args.hidden_units, args.epochs, model, optimizer, args.save_directory)
def run_model(train_batches, dev_batches, test_batches, lr=0.1, momentum=0, act_fun='ReLU', hidden_size=10): torch.manual_seed(12321) # for reproducibility act_func_call = nn.ReLU if act_fun == 'LeakyReLU': act_func_call = nn.LeakyReLU ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, hidden_size), act_func_call(), nn.Linear(hidden_size, 10), ) ################################## val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) return val_acc, loss, accuracy
def test1(): # baseline # Split dataset into batches batch_size = 32 # batch_size = 64 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, 10), nn.ReLU(), # nn.LeakyReLU(), nn.Linear(10, 10), ) lr = 0.1 # lr = 0.01 momentum = 0 # momentum = 0.9 ################################## val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return val_acc
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = batch_size, 784, 128, 10 model = nn.Sequential( nn.Linear(D_in, H), nn.ReLU(), nn.Linear(H, D_out) ) lr=0.1 momentum=0 leaky_relu_active = False ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Batch size: {}; Learning Rate: {}; Momentum: {}; LeakyReLU: {}; Hidden Dimension: {}". format(batch_size, lr, momentum, leaky_relu_active, H)) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(batch_size=32, lr=0.1, momentum=0, act="ReLU", hsize=10): print("batch: %d, learnign rate: %f, momentum: %f, activation: %s" % (batch_size, lr, momentum, act)) # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches # batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, hsize), get_activation(act), nn.Linear(hsize, 10), ) # lr=0.1 # momentum=0 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 #batch_size = 64 #Acc1 = 0.9314 #Acc2= 0.976478 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, 128), nn.ReLU(), #nn.LeakyReLU(), #Acc1 = 0.9207 Acc2 = 0.978944 nn.Linear(128, 10), ) lr=0.1 #lr = 0.01 #Acc1 = 0.9206 Acc2= 0.955047 #momentum=0 momentum = 0.9 #Acc1 = 0.8928 Acc2 = 0.967246 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print ("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Conv2d(1, 32, (3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), ) ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(batch_size=32, lr=0.1, momentum=0, hidden_size=10, leakyReLU=False): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO if leakyReLU == False: nonLinearLayer = nn.ReLU() else: nonLinearLayer = nn.LeakyReLU() model = nn.Sequential( nn.Linear(784, hidden_size), nonLinearLayer, nn.Linear(hidden_size, 10), ) ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # ======== Load the dataset =========== num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # print(y_train) # ======= Split into train and dev ========== dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # ========= Split dataset into batches ============ batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ## =========== MODEL SPECIFICATION ============ model = nn.Sequential( nn.Linear(784, 10), nn.ReLU(), nn.Linear(10, 10), ) lr=0.1 momentum=0 # model.summary() ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print ("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]] X_train = X_train[:dev_split_index] y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) # print(train_batches[0]['x'].shape, train_batches[0]['y'].shape) # batch[i]['x'] is (64, 1, 42, 28) = (batch_size, 1, img_rows, img_cols) # batch[i]['y'] is (2, 64) = (num_labels, batch_size) # print(len(X_train), len(y_train)) # 36000, 2 # print(len(X_dev), len(y_dev)) # 4000, 2 # print(len(train_batches), len(dev_batches), len(test_batches)) # 562, 62, 62 # Load model input_dimension = img_rows * img_cols model = MLP(input_dimension) # Train train_model(train_batches, dev_batches, model) ## Evaluate the model on test data loss, acc = run_epoch(test_batches, model.eval(), None) print( 'Test loss1: {:.6f} accuracy1: {:.6f} loss2: {:.6f} accuracy2: {:.6f}' .format(loss[0], acc[0], loss[1], acc[1]))
def main(): X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]] X_train = X_train[:dev_split_index] y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]] permutation = torch.randperm(len(X_train)) X_train = X_train[permutation] y_train = [y_train[0][permutation], y_train[1][permutation]] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) # Load model input_dimension = img_rows * img_cols model = CNN() # Move model to the GPU if torch.cuda.is_available(): model = model.to(device) print("----------------- Using the Device: GPU -----------------") else: print("----------------- Using the Device: CPU -----------------") # Train train_model(train_batches, dev_batches, model) # Evaluate the model on test data loss, acc = run_epoch(test_batches, model.eval(), None) print( 'Test loss1: {:.6f} accuracy1: {:.6f} loss2: {:.6f} accuracy2: {:.6f}' .format(loss[0], acc[0], loss[1], acc[1]))
def test_grid(batch_size=32, lr=0.1, momentum=0, LeakyReLU=False): np.random.seed(12321) # for reproducibility torch.manual_seed(12321) # for reproducibility # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model_relu = nn.Sequential( nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10), ) model_lrelu = nn.Sequential( nn.Linear(784, 128), nn.LeakyReLU(), nn.Linear(128, 10), ) model = model_lrelu if LeakyReLU else model_relu ################################## val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print ("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return val_acc