def visualize(): # initialise the model discriminator = ArcBinaryClassifier(num_glimpses=opt.numGlimpses, glimpse_h=opt.glimpseSize, glimpse_w=opt.glimpseSize, controller_out=opt.numStates) discriminator.load_state_dict( torch.load(os.path.join("saved_models", opt.name, opt.load))) arc = discriminator.arc sample = get_sample(discriminator) all_hidden = arc._forward( sample[None, :, :])[:, 0, :] # (2*numGlimpses, controller_out) glimpse_params = torch.tanh(arc.glimpser(all_hidden)) masks = arc.glimpse_window.get_attention_mask(glimpse_params, mask_h=opt.imageSize, mask_w=opt.imageSize) # separate the masks of each image. masks1 = [] masks2 = [] for i, mask in enumerate(masks): if i % 2 == 1: # the first image outputs the hidden state for the next image masks1.append(mask) else: masks2.append(mask) for i, (mask1, mask2) in enumerate(zip(masks1, masks2)): display(sample[0], mask1, sample[1], mask2, "img_{}".format(i))
def visualize(): # set up the optimizer. bce = torch.nn.BCELoss() if opt.cuda: bce = bce.cuda() # initialise the model discriminator = ArcBinaryClassifier(num_glimpses=opt.numGlimpses, glimpse_h=opt.glimpseSize, glimpse_w=opt.glimpseSize, controller_out=opt.numStates) discriminator.load_state_dict(torch.load(os.path.join("saved_models", opt.name, opt.load))) arc = discriminator.arc # load the dataset in memory. loader = Batcher(batch_size=opt.batchSize, image_size=opt.imageSize) X, Y = loader.fetch_batch("test") pred = discriminator(X) loss = bce(pred, Y.float()) for sample_num, sample in enumerate(X): all_hidden = arc._forward(sample[None, :, :])[:, 0, :] # (2*numGlimpses, controller_out) glimpse_params = torch.tanh(arc.glimpser(all_hidden)) masks = arc.glimpse_window.get_attention_mask(glimpse_params, mask_h=opt.imageSize, mask_w=opt.imageSize) sample_pred = pred[sample_num].item() sample_target = Y[sample_num].item() # separate the masks of each image. masks1 = [] masks2 = [] for i, mask in enumerate(masks): if i % 2 == 1: # the first image outputs the hidden state for the next image masks1.append(mask) else: masks2.append(mask) for i, (mask1, mask2) in enumerate(zip(masks1, masks2)): display(sample[0], mask1, sample[1], mask2, sample_pred, sample_target, "sample_{}_img_{}".format(sample_num, i))
def train(): opt = parser.parse_args() if opt.cuda: batcher.use_cuda = True models.use_cuda = True if opt.name is None: # if no name is given, we generate a name from the parameters. # only those parameters are taken, which if changed break torch.load compatibility. opt.name = "{}_{}_{}_{}".format(opt.numGlimpses, opt.glimpseSize, opt.numStates, "cuda" if opt.cuda else "cpu") print("Will start training {} with parameters:\n{}\n\n".format( opt.name, opt)) # make directory for storing models. models_path = os.path.join("saved_models", opt.name) os.makedirs(models_path, exist_ok=True) # initialise the model discriminator = ArcBinaryClassifier(num_glimpses=opt.numGlimpses, glimpse_h=opt.glimpseSize, glimpse_w=opt.glimpseSize, controller_out=opt.numStates) if opt.cuda: discriminator.cuda() # load from a previous checkpoint, if specified. if opt.load is not None: discriminator.load_state_dict( torch.load(os.path.join(models_path, opt.load))) # set up the optimizer. bce = torch.nn.BCELoss() if opt.cuda: bce = bce.cuda() optimizer = torch.optim.Adam(params=discriminator.parameters(), lr=opt.lr) # load the dataset in memory. loader = Batcher(batch_size=opt.batchSize, image_size=opt.imageSize) # ready to train ... best_validation_loss = None saving_threshold = 1.02 last_saved = datetime.utcnow() save_every = timedelta(minutes=10) i = -1 while True: i += 1 X, Y = loader.fetch_batch("train") discriminator.train() # set to train mode pred = discriminator(X) loss = bce(pred, Y.float()) # note that this only validating every 10 steps of training, need to fix this later # need to set in eval mode and turn off gradients in eval mode with torch.no_grad(): if i % 10 == 0: # validate your model X_val, Y_val = loader.fetch_batch("val") discriminator.eval() # set to evaluation mode pred_val = discriminator(X_val) loss_val = bce(pred_val, Y_val.float()) training_loss = loss.item() validation_loss = loss_val.item() print( "Iteration: {} \t Train: Acc={}%, Loss={} \t\t Validation: Acc={}%, Loss={}" .format(i, get_pct_accuracy(pred, Y), training_loss, get_pct_accuracy(pred_val, Y_val), validation_loss)) if best_validation_loss is None: best_validation_loss = validation_loss if best_validation_loss > (saving_threshold * validation_loss): print( "Significantly improved validation loss from {} --> {}. Saving..." .format(best_validation_loss, validation_loss)) discriminator.save_to_file( os.path.join(models_path, str(validation_loss))) best_validation_loss = validation_loss last_saved = datetime.utcnow() if last_saved + save_every < datetime.utcnow(): print( "It's been too long since we last saved the model. Saving..." ) discriminator.save_to_file( os.path.join(models_path, str(validation_loss))) last_saved = datetime.utcnow() optimizer.zero_grad() loss.backward() optimizer.step()
opt = parser.parse_args() if opt.name is None: # if no name is given, we generate a name from the parameters. # only those parameters are taken, which if changed break torch.load compatibility. opt.name = "{}_{}_{}_{}".format(opt.numGlimpses, opt.glimpseSize, opt.numStates, "cuda" if opt.cuda else "cpu") # initialise the batcher batcher = Batcher(batch_size=opt.batchSize) if __name__ == "__main__": discriminator = ArcBinaryClassifier(num_glimpses=opt.numGlimpses, glimpse_h=opt.glimpseSize, glimpse_w=opt.glimpseSize, controller_out=opt.numStates) discriminator.load_state_dict( torch.load(os.path.join("saved_models", opt.name, opt.load))) # holds results of classification of test dataset results = [] # get first test image classification, to get first index variable value X, true_label, size, index = batcher.fetch_test_batch() pred = discriminator(X).data.numpy() # get position of train image that is the most similar to 'index-th' test image. Divide by 100 because first 100 # images are 0-th class, second 100 images are 1-th class and so on. Compared to true label (0 - 9) interval. To # use with unequal classes, use array with indexes where each class starts. if pred.argmax() // 100 == true_label: # correctly classified results.append(1) else:
if name is None: # if no name is given, we generate a name from the parameters. # only those parameters are taken, which if changed break torch.load compatibility. name = "{}_{}_{}_{}".format(numGlimpses, glimpseSize, numStates, "cuda" if cuda else "cpu") print("Will start training {} \n".format(name)) # make directory for storing models. models_path = os.path.join("saved_models", name) os.makedirs(models_path, exist_ok=True) # initialise the model discriminator = ArcBinaryClassifier(num_glimpses=numGlimpses, glimpse_h=glimpseSize, glimpse_w=glimpseSize, controller_out=numStates) if cuda: discriminator.cuda() # load from a previous checkpoint, if specified. if load is not None: discriminator.load_state_dict(torch.load(os.path.join(models_path, load))) # set up the optimizer. mse = torch.nn.MSELoss(size_average=False) if cuda: mse = mse.cuda() optimizer = torch.optim.Adam(params=discriminator.parameters(), lr=lr)
def test(epochs=1): # set up the optimizer. bce = torch.nn.BCELoss() # initialise the model discriminator = ArcBinaryClassifier(num_glimpses=opt.numGlimpses, glimpse_h=opt.glimpseSize, glimpse_w=opt.glimpseSize, controller_out=opt.numStates) if opt.cuda: bce = bce.cuda() discriminator.cuda() discriminator.load_state_dict( torch.load(os.path.join("saved_models", opt.name, opt.load))) arc = discriminator.arc # load the dataset in memory. loader = Batcher(batch_size=opt.batchSize, image_size=opt.imageSize) # retrieve total number of samples num_unique_chars = loader.data.shape[0] num_samples_per_char = loader.data.shape[1] total_num_samples = num_unique_chars * num_samples_per_char # calc number of steps per epoch num_batches_per_epoch = int(total_num_samples / opt.batchSize) print('num_batches_per_epoch', num_batches_per_epoch) # loop thru epochs all_epoch_losses = [] # track all epoch losses here, for a whole batch all_epoch_acc = [] # used for precision recall later preds = [] labels = [] discriminator.eval() # set in eval mode # turn off gradients with torch.no_grad(): for epoch in range(epochs): running_epoch_loss = 0 running_epoch_acc = 0 # loop through num of batches in an epoch for batch_num in range(num_batches_per_epoch): X, Y = loader.fetch_batch( "test") # loader loads data to cuda if available pred = discriminator(X) batch_loss = bce(pred, Y.float()) running_epoch_loss += batch_loss.item() # sum all the loss batch_acc = get_pct_accuracy(pred, Y) running_epoch_acc += batch_acc if batch_num % 100 == 0: print("Batch: {} \t Test: Acc={}%, Loss={}:".format( batch_num, batch_acc, batch_loss)) # append the average loss over the epoch (for a whole batch) all_epoch_losses.append(running_epoch_loss / num_batches_per_epoch) all_epoch_acc.append(running_epoch_acc / num_batches_per_epoch) # loop through losses and display per epoch for i in range(len(all_epoch_losses)): print('Epoch {} - Loss: {}, Accuracy: {}'.format( i, all_epoch_losses[i], all_epoch_acc[i]))