def train( self, data_path, criterion, max_epoch=50, batch_size=1, k_fold=False, n_k=5, n_discontinue=5, ): model = self.model model.train() dataset = MyDataset(data_path) len_dataset = len(dataset) train_test_indicies = createIndicies(len_dataset, k_fold) optimizer = torch.optim.Adam(model.parameters(), weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95) tb_writer = SummaryWriter(log_dir=self.log_dir) for train_indices, test_indices in train_test_indicies: # n_k times split = DataSplit( dataset, train_indices, test_indices, val_train_split=0.1, device=DEVICE ) train_loader, valid_loader, test_loader = split.get_split( batch_size, num_workers=4 ) loss_table, loss_list = "", [] epoch_index = 1 controller, condition = Controller(max_epoch, n_discontinue), True while condition: train_loss, valid_loss = trainOneEpoch( model, criterion, train_loader, valid_loader, optimizer, DEVICE, epoch_index, tb_writer, ) loss_table = displayLoss( loss_table, epoch_index, train_loss, valid_loss ) loss_list.append([train_loss, valid_loss]) scheduler.step() if epoch_index % EVAL_INTERVAL == 0: PATH = "./cifar_net.pth" torch.save(model.state_dict(), PATH) epoch_index, condition = controller.update(valid_loss) print("Finished Training") dat = np.array(loss_list) x = np.arange(dat.shape[0]) plt.plot(x, dat) plt.savefig( "/n/work1/ooyama/plot_image/learning_curve" + str(datetime.datetime.now()) + ".png" ) tb_writer.close()
torch.cuda.manual_seed(94305) np.random.seed(94305) random.seed(94305) parser = argparse.ArgumentParser( description="Differentiable k-nearest neighbors.") parser.add_argument("--k", type=int, metavar="k") parser.add_argument("--tau", type=float, metavar="tau") parser.add_argument("--nloglr", type=float, metavar="-log10(beta)") parser.add_argument("--method", type=str) parser.add_argument("-startnew", action='store_true') parser.add_argument("--dataset", type=str) args = parser.parse_args() dataset = args.dataset split = DataSplit(dataset) print(args) k = args.k tau = args.tau NUM_TRAIN_QUERIES = 100 NUM_TEST_QUERIES = 10 NUM_TRAIN_NEIGHBORS = 100 LEARNING_RATE = 10**-args.nloglr NUM_SAMPLES = 5 resume = not args.startnew method = args.method NUM_EPOCHS = 150 if dataset == 'cifar10' else 50 EMBEDDING_SIZE = 500 if dataset == 'mnist' else 512
parser.add_argument("--tau", type=float, metavar="tau", default=16.) parser.add_argument("--nloglr", type=float, metavar="-log10(beta)", default=3.) parser.add_argument("--method", type=str, default="deterministic") parser.add_argument("-resume", action='store_true') parser.add_argument("--dataset", type=str, required=True) parser.add_argument("--num_train_queries", type=int, default=100) # no effect on training, but massive effect on memory usage parser.add_argument("--num_test_queries", type=int, default=10) parser.add_argument("--num_train_neighbors", type=int, default=100) parser.add_argument("--num_samples", type=int, default=5) parser.add_argument("--num_epochs", type=int, default=200) args = parser.parse_args() dataset = args.dataset split = DataSplit(dataset) print(args) k = args.k tau = args.tau NUM_TRAIN_QUERIES = args.num_train_queries NUM_TEST_QUERIES = args.num_test_queries NUM_TRAIN_NEIGHBORS = args.num_train_neighbors LEARNING_RATE = 10**-args.nloglr NUM_SAMPLES = args.num_samples resume = args.resume method = args.method NUM_EPOCHS = args.num_epochs EMBEDDING_SIZE = 500 if dataset == 'mnist' else 512
input_size = (3, 80, 80) output_size = 1 lr = 1e-4 momentum = 0.9 opt = torch.optim.SGD # Datasplit train, val, test = 70, 15, 15 if GPU and torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') train_dataset, val_dataset, test_dataset = DataSplit( DATASET_DIR, train, val, test, input_shape=input_size[1:]).get_datasets(device) train_generator = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_generator = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False) # Initialize tensorboard tensorboard = TBManager() # Visulaize some images of the training set cat_samples = train_dataset.images[: 10] # The first images on the list are cats dog_samples = train_dataset.images[