def main(): test_dataset = get_dataset(args.dataset, train=False) test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False) img, _ = test_dataset[0] img_shape = img.shape if args.model == 'fc': input_size = torch.flatten(img).shape[0] model = FC(input_size=input_size, z_size=args.latent_dim).to(device) elif args.model == 'cnn': model = CNN(img_shape=img_shape, z_size=args.latent_dim).to(device) model.load_state_dict(torch.load(args.saved_path, map_location=device)) model.eval() z_preds = [] y_labels = [] with torch.no_grad(): for xs, ys in test_loader: xs = xs.to(device) _, mu, logvar = model(xs) std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) zs = mu + eps * std z_preds.extend(zs.clone().cpu().numpy()) y_labels.extend(ys.clone().cpu().numpy()) tsne = TSNE(n_components=2, perplexity=30, n_iter=3000, init='pca') tsne_results = tsne.fit_transform(z_preds) z1 = tsne_results[:, 0] z2 = tsne_results[:, 1] df = pd.DataFrame(list(zip(z1, z2, y_labels)), columns=['z1', 'z2', 'y']) sns_plot = sns.scatterplot(x='z1', y='z2', hue='y', palette=sns.color_palette('hls', 10), data=df, legend='full') sns_plot.figure.savefig('latent.png')
def main(): test_dataset = get_dataset(args.dataset, train=False) img, _ = test_dataset[0] img_shape = img.shape if args.model == 'fc': input_size = torch.flatten(img).shape[0] model = FC(input_size=input_size, z_size=args.latent_dim).to(device) elif args.model == 'cnn': model = CNN(img_shape=img_shape, z_size=args.latent_dim).to(device) model.load_state_dict(torch.load(args.saved_path, map_location=device)) model.eval() with torch.no_grad(): z = torch.randn(NUM_SAMPLES, args.latent_dim) z = z.to(device) preds = model.decode(z) preds = preds.detach().cpu() preds = preds.view(-1, *img_shape) preds = inv_normalize(preds, args.dataset) save_image(preds, args.dataset + f'_{args.model}' + '_random_sample.png', nrow=NROW)
def main_worker(gpu, ngpus_per_node, args, config): set_seed(**config["seed"]) logger = get_loguru_logger(args.log_dir, resume=args.resume, is_rank0=(gpu == 0)) start_time = time.asctime(time.localtime(time.time())) logger.info("Start at: {} at: {}".format(start_time, platform.node())) torch.cuda.set_device(gpu) if args.distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:{}".format(args.dist_port), world_size=args.world_size, rank=args.rank, ) logger.warning("Only log rank 0 in distributed training!") logger.info("===Prepare data===") if "torch_transforms" in config: train_transform = TorchTransforms(config["torch_transforms"]["train"]) test_transform = TorchTransforms(config["torch_transforms"]["test"]) else: train_transform, test_transform = None, None logger.info("Torch training transformations:\n{}".format(train_transform)) logger.info("Torch test transformations:\n{}".format(test_transform)) logger.info("Load dataset from: {}".format(config["dataset_dir"])) train_data = get_dataset(config["dataset_dir"], train_transform) test_data = get_dataset(config["dataset_dir"], test_transform, train=False) prefetch = "prefetch" in config and config["prefetch"] logger.info("Prefetch: {}".format(prefetch)) if args.distributed: train_sampler = DistributedSampler(train_data) # Divide batch size equally among multiple GPUs, # to keep the same learning rate used in a single GPU. batch_size = int(config["loader"]["batch_size"] / ngpus_per_node) num_workers = config["loader"]["num_workers"] train_loader = get_loader( train_data, prefetch=prefetch, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers, ) else: train_sampler = None train_loader = get_loader( train_data, prefetch=prefetch, loader_config=config["loader"], shuffle=True ) test_loader = get_loader( test_data, prefetch=prefetch, loader_config=config["loader"] ) logger.info("\n===Setup training===") model = get_network(config["network"]) logger.info("Create network: {}".format(config["network"])) model = model.cuda(gpu) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda(gpu) logger.info("Create criterion: {}".format(criterion)) optimizer = get_optimizer(model, config["optimizer"]) logger.info("Create optimizer: {}".format(optimizer)) scheduler = get_scheduler(optimizer, config["lr_scheduler"]) logger.info("Create scheduler: {}".format(config["lr_scheduler"])) resumed_epoch, best_acc, best_epoch = resume_state( model, args.resume, args.ckpt_dir, logger, optimizer=optimizer, scheduler=scheduler, is_best=True, ) if args.distributed: # Convert BatchNorm*D layer to SyncBatchNorm before wrapping Network with DDP. if "sync_bn" in config and config["sync_bn"]: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) logger.info("Turn on synchronized batch normalization in ddp.") model = DistributedDataParallel(model, device_ids=[gpu]) for epoch in range(config["num_epochs"] - resumed_epoch): if args.distributed: train_sampler.set_epoch(epoch) logger.info( "===Epoch: {}/{}===".format(epoch + resumed_epoch + 1, config["num_epochs"]) ) logger.info("Training...") train_result = train( model, train_loader, criterion, optimizer, logger, amp=args.amp, ) logger.info("Test...") test_result = test(model, test_loader, criterion, logger) if scheduler is not None: scheduler.step() logger.info( "Adjust learning rate to {}".format(optimizer.param_groups[0]["lr"]) ) # Save result and checkpoint. if not args.distributed or (args.distributed and gpu == 0): result = {"train": train_result, "test": test_result} result2csv(result, args.log_dir) saved_dict = { "epoch": epoch + resumed_epoch + 1, "result": result, "optimizer_state_dict": optimizer.state_dict(), "best_acc": best_acc, "best_epoch": best_epoch, } if not "parallel" in str(type(model)): saved_dict["model_state_dict"] = model.state_dict() else: # DP or DDP. saved_dict["model_state_dict"] = model.module.state_dict() if scheduler is not None: saved_dict["scheduler_state_dict"] = scheduler.state_dict() is_best = False if test_result["acc"] > best_acc: is_best = True best_acc = test_result["acc"] best_epoch = epoch + resumed_epoch + 1 logger.info( "Best test accuaracy {} in epoch {}".format(best_acc, best_epoch) ) if is_best: ckpt_path = os.path.join(args.ckpt_dir, "best_model.pt") torch.save(saved_dict, ckpt_path) logger.info("Save the best model to {}".format(ckpt_path)) ckpt_path = os.path.join(args.ckpt_dir, "latest_model.pt") torch.save(saved_dict, ckpt_path) logger.info("Save the latest model to {}".format(ckpt_path)) end_time = time.asctime(time.localtime(time.time())) logger.info("End at: {} at: {}".format(end_time, platform.node()))
def main(): train_dataset = get_dataset(args.dataset, train=True) test_dataset = get_dataset(args.dataset, train=False) cuda_kwargs = {} if torch.cuda.is_available(): cuda_kwargs = {'num_workers': 4, 'pin_memory': True} train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **cuda_kwargs) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, **cuda_kwargs) img, _ = train_dataset[0] img_shape = img.shape if args.model == 'fc': input_size = torch.flatten(img).shape[0] model = FC(input_size=input_size, z_size=args.latent_dim).to(device) elif args.model == 'cnn': model = CNN(img_shape=img_shape, z_size=args.latent_dim).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.scheduler_step, gamma=args.scheduler_gamma) train_losses = [] train_recon_losses = [] train_kl_losses = [] test_losses = [] test_recon_losses = [] test_kl_losses = [] print("======> Start training") for epoch in range(1, args.num_epochs + 1): train_loss, train_recon_loss, train_kl_loss = train( epoch, model, img_shape, optimizer, train_loader) test_loss, test_recon_loss, test_kl_loss = test( epoch, model, img_shape, test_loader) train_losses.append(train_loss) train_recon_losses.append(train_recon_loss) train_kl_losses.append(train_kl_loss) test_losses.append(test_loss) test_recon_losses.append(test_recon_loss) test_kl_losses.append(test_kl_loss) scheduler.step() if args.save_model: model_save_path = args.model + '_' + args.dataset + ".pt" if args.save_path is None else args.save_path torch.save(model.state_dict(), model_save_path) print(f"Model saved at {model_save_path}") train_split = ['train'] * args.num_epochs test_split = ['test'] * args.num_epochs epochs = list(range(1, args.num_epochs + 1)) columns = ['Split', 'Epochs', 'Loss', 'Reconstruction_Loss', 'KL_Loss'] train_df = pd.DataFrame(list( zip(train_split, epochs, train_losses, train_recon_losses, train_kl_losses)), columns=columns) test_df = pd.DataFrame(list( zip(test_split, epochs, test_losses, test_recon_losses, test_kl_losses)), columns=columns) learning_curve_csv = args.learning_curve_csv if args.learning_curve_csv is not None else f'{args.model}_' + args.dataset + '.csv' learning_curve_df = pd.concat([train_df, test_df], ignore_index=True) learning_curve_df.to_csv(learning_curve_csv, mode='w') print(f"Learning curve saved at: {learning_curve_csv}")
from data import utils utils.get_dataset(0, 0, 0, 10, "tcga-gbm", 0, {})
# %% alphabet = Alphabet("data/english_alphabet.txt") dataloader = get_dataloader("librispeech", batch_size=8, use_cuda=False, alphabet=alphabet, n_features=128, split="train") # %% X, y, X_lens, y_lens = next(iter(dataloader)) # %% import matplotlib.pyplot as plt from data.utils import get_dataset data_dir = "librispeech" dataset = get_dataset(data_dir, download=True, split="train", as_audiodataset=True) # %% sample = dataset[0] sample.plot(kind="mfcc", n_features=12) plt.show() #sample.plot(kind="waveform") #plt.show() # %% sample.featurize(5) features = sample.features.numpy().squeeze()