def main(args): mnasnet = models.mnasnet1_0(pretrained=True).to(device).eval() cvae = CVAE(1000, 128, args.n_class * 2, args.n_class).to(device) cvae.encoder.eval() regressor = Regressor().to(device) if Path(args.cvae_resume_model).exists(): print("load cvae model:", args.cvae_resume_model) cvae.load_state_dict(torch.load(args.cvae_resume_model)) if Path(args.regressor_resume_model).exists(): print("load regressor model:", args.regressor_resume_model) regressor.load_state_dict(torch.load(args.regressor_resume_model)) image_label = pandas.read_csv( Path(args.data_root, args.metadata_file_name.format( args.subset))).sample(frac=1, random_state=551)[:250] image_label["class"] = image_label["class"] - 1 dataset = WBCDataset(args.n_class, image_label[:250].values, args.data_root, subset=args.subset, train=True) data_loader = loader(dataset, args.batch_size, True) cvae_optimizer = RAdam(cvae.parameters(), weight_decay=1e-3) regressor_optimizer = RAdam(regressor.parameters(), weight_decay=1e-3) train(args, mnasnet, cvae, regressor, cvae_optimizer, regressor_optimizer, data_loader)
def main(args): mnasnet1_0 = models.mnasnet1_0(pretrained=True).to(device).eval() model = CVAE(1000, 128, 128, args.n_class, 128).to(device) image_label = pandas.read_csv( Path(args.data_root, args.metadata_file_name.format(args.subset)) ).sample(frac=1, random_state=551)[:250] image_label["class"] = image_label["class"] - 1 dataset = WBCDataset(image_label.values, args.data_root, subset=args.subset) data_loader = loader(dataset, args.batch_size, True) optimizer = RAdam(model.parameters(), weight_decay=1e-3) train(args, mnasnet1_0, model, optimizer, data_loader)
def main(args): mnasnet = models.mnasnet1_0(pretrained=True).to(device).eval() model = CVAE(1000, 128, 128, args.n_class, 128).to(device).eval() if Path(args.resume_model).exists(): print("load regressor model:", args.resume_model) model.load_state_dict(torch.load(args.resume_model)) image_label = pandas.read_csv( Path(args.data_root, args.metadata_file_name.format(args.subset)) ).sample(frac=1, random_state=551) #[250:] image_label["class"] = image_label["class"] - 1 dataset = WBCDataset(image_label.values, args.data_root, subset=args.subset) data_loader = loader(dataset, 1, False) test(args, mnasnet, model, data_loader)
def read(config, args, first_task=False): """ Read experiment configuration, Generate dataset and model \n @param: \n config(configparser.ConfigParser): configuration object \n args(argparse.ArgumentParser): command line argument object \n @return: \n dataset(DatasetWoz3): dataset to use \n model(nn.Module) \n """ print('Processing data...', file=sys.stderr) # Read settings from config.cfg model_type = config["MODEL"]["model_type"] decoder_type = config["MODEL"]["dec_type"] percentage = config.getfloat("MODEL", "train_percentage") data_split = config["DATA"]["data_split"] n_layer = config.getint("MODEL", "num_layer") hidden_size = config.getint("MODEL", "hidden_size") beam_size = config.getint("TESTING", "beam_size") experiment_prefix = config["EXPERIMENT"]["experiment_prefix"] + str( args.random_seed) + '/' experiment_type = config["EXPERIMENT"]["experiment"] # Get model settings for cvae if model_type == "cvae": latent_size = config.getint("MODEL", "latent_size") std = config.getfloat("MODEL", "std") if first_task: # Pretrain the mode of the first task using the same if model_type == 'lm': dropout = 0.25 lr = 0.005 else: # scave dropout = 0.25 lr = 0.002 else: # Read dropout and learning rate dropout = args.dropout if "dropout" in experiment_type else 0 lr = args.lr # Add suffix to experiment type to indicate hyper parameter used if 'loss' in experiment_type: experiment_type = experiment_type + '_' + str(args.sv_len_weight) if 'distillation' in experiment_type: experiment_type = experiment_type + '_' + str(args._lambda) if 'ewc' in experiment_type: experiment_type = experiment_type + '_' + str(args.ewc_importance) if 'l2' in experiment_type: experiment_type = experiment_type + '_' + str(args.l2_weight) if 'dropout' in experiment_type: experiment_type = experiment_type + '_' + str(args.dropout) dataset = DatasetWoz3(config, data_split, percentage=percentage) # Get dataset parameter d_size = dataset.do_size + dataset.da_size + dataset.sv_size # len of 1-hot feature vector do_size = dataset.do_size # number of domain da_size = dataset.da_size # number of dialogue act sv_size = dataset.sv_size # number of slot values vocab_size = len(dataset.word2index) # vocabulary size # Construct model path to save/load the model model_path = construct_model_path(experiment_prefix, experiment_type, model_type) print(f"The model path is {model_path}", file=sys.stderr) print(f"The mode is {args.mode}", file=sys.stderr) # Initialize model if model_type == "lm": model = LM_deep(decoder_type, args, vocab_size, vocab_size, hidden_size, d_size, n_layer=n_layer, dropout=dropout, lr=lr) elif model_type == "cvae": model = CVAE(decoder_type, args, hidden_size, vocab_size, latent_size, d_size, do_size, da_size, sv_size, std, n_layer=n_layer, dropout=dropout, lr=lr) # Load model if recover/test mode if args.mode == "train": assert not os.path.isfile(model_path) elif args.mode == "recover": # Load the model specified by the task suffix for recovering training task_suffix = args.recovered_tasks model_path = f"{model_path[: len(model_path) - 3]}_{task_suffix}.pt" print(f"Recovering from {model_path}", file=sys.stderr) state = torch.load(model_path) model.load_state_dict(state["model_state_dict"]) model.solver.load_state_dict(state["optimizer_state_dict"]) if USE_CUDA: model.to(torch.device("cuda")) else: # Load the model specified by the task suffix for testing task_suffix = args.recovered_tasks model_path = f"{model_path[: len(model_path) - 3]}_{task_suffix}.pt" print(f"Testing at {model_path}", file=sys.stderr) state = torch.load(model_path) model.load_state_dict(state["model_state_dict"]) if args.mode != 'adapt': model.eval() # Print model info print('\n***** MODEL INFO *****') print('MODEL TYPE:', model_type) print('MODEL PATH:', model_path) print('SIZE OF HIDDEN:', hidden_size) print('# of LAYER:', n_layer) print('SAMPLE/BEAM SIZE:', beam_size) print('*************************\n') # Move models to GPU if USE_CUDA: model.cuda() return dataset, model
def main(): if not os.path.isdir(args.logdir): os.mkdir(args.logdir) with open('architecture.json') as f: arch = json.load(f) dataset = MNISTLoader(args.datadir) dataset.divide_semisupervised(N_u=arch['training']['num_unlabeled']) x_s, y_s = dataset.pick_supervised_samples( smp_per_class=arch['training']['smp_per_class']) x_u = dataset.x_u x_t, y_t = dataset.x_t, dataset.y_t x_1, _ = dataset.pick_supervised_samples(smp_per_class=1) x_l_show = reshape(x_s, 10) imshow([x_l_show], os.path.join(args.logdir, 'x_labeled.png')) batch_size = arch['training']['batch_size'] N_EPOCH = arch['training']['epoch'] N_ITER = x_u.shape[0] // batch_size N_HALFLIFE = arch['training']['halflife'] h, w, c = arch['hwc'] X_u = tf.placeholder(shape=[None, h, w, c], dtype=tf.float32) X_l = tf.constant(x_s) Y_l = tf.one_hot(y_s, arch['y_dim']) net = CVAE(arch) loss = net.loss(X_u, X_l, Y_l) encodings = net.encode(X_u) Z_u = encodings['mu'] Y_u = encodings['y'] Xh = net.decode(Z_u, Y_u) label_pred = tf.argmax(Y_u, 1) Y_pred = tf.one_hot(label_pred, arch['y_dim']) Xh2 = net.decode(Z_u, Y_pred) thumbnail = make_thumbnail(Y_u, Z_u, arch, net) opt = get_optimization_ops(loss, arch=arch) if args.gpu_cfg: with open(args.gpu_cfg) as f: cfg = json.load(f) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg['per_process_gpu_memory_fraction']) session_conf = tf.ConfigProto( allow_soft_placement=cfg['allow_soft_placement'], log_device_placement=cfg['log_device_placement'], inter_op_parallelism_threads=cfg['inter_op_parallelism_threads'], intra_op_parallelism_threads=cfg['intra_op_parallelism_threads'], gpu_options=gpu_options) sess = tf.Session(config=session_conf) else: sess_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) sess = tf.Session(config=sess_config) # sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) # writer = tf.train.SummaryWriter(args.logdir) # TODO # writer.add_graph(tf.get_default_graph()) # TODO # summary_op = tf.merge_all_summaries() # TODO saver = tf.train.Saver() # =============================== # [TODO] # 1. batcher class # 1) for train and for test # 2) binarization # 3) shffule as arg # 5. TBoard (training tracker to monitor the convergence) # =============================== sqrt_bz = int(np.sqrt(batch_size)) logfile = os.path.join(args.logdir, 'log.txt') try: step = 0 for ep in range(N_EPOCH): np.random.shuffle(x_u) # shuffle for it in range(N_ITER): step = ep * N_ITER + it idx = range(it * batch_size, (it + 1) * batch_size) tau = halflife( step, N0=arch['training']['largest_tau'], T_half=N_ITER*N_HALFLIFE, thresh=arch['training']['smallest_tau']) batch = np.random.binomial(1, x_u[idx]) _, l_x, l_z, l_y, l_l = sess.run( [opt['g'], loss['Dis'], loss['KL(z)'], loss['H(y)'], loss['Labeled']], {X_u: batch, net.tau: tau}) msg = 'Ep [{:03d}/{:d}]-It[{:03d}/{:d}]: Lx: {:6.2f}, KL(z): {:4.2f}, L:{:.2e}: H(u): {:.2e}'.format( ep, N_EPOCH, it, N_ITER, l_x, l_z, l_l, l_y) print(msg) if it == (N_ITER -1): # b, y, xh, xh2, summary = sess.run( # TODO # [X_u, Y_u, Xh, Xh2, summary_op], # TODO b, y, xh, xh2 = sess.run( [X_u, Y_u, Xh, Xh2], {X_u: batch, net.tau: tau}) b = reshape(b, sqrt_bz) xh = reshape(xh, sqrt_bz) xh2 = reshape(xh2, sqrt_bz) y = np.argmax(y, 1).astype(np.int32) y = np.reshape(y, [sqrt_bz, sqrt_bz]) png = os.path.join(args.logdir, 'Ep-{:03d}-reconst.png'.format(ep)) with open(logfile, 'a') as f: f.write(png + ' ') f.write('Tau: {:.3f}\n'.format(tau[0])) f.write(msg + '\n') n, m = y.shape for i in range(n): for j in range(m): f.write('{:d} '.format(y[i, j])) f.write('\n') f.write('\n\n') imshow( img_list=[b, xh, xh2], filename=png, titles=['Ground-truth', 'Reconstructed using dense label', 'Reconstructed using onehot label']) # writer.add_summary(summary, step) # TODO # Periodic evaluation if it == (N_ITER - N_ITER) and ep % arch['training']['summary_freq'] == 0: # ==== Classification ==== y_p = list() bz = 100 for i in range(N_TEST // bz): b_t = x_t[i * bz: (i + 1) * bz] b_t[b_t > 0.5] = 1.0 # [MAKESHIFT] Binarization b_t[b_t <= 0.5] = 0.0 p = sess.run( label_pred, {X_u: b_t, net.tau: tau}) y_p.append(p) y_p = np.concatenate(y_p, 0) # ==== Style Conversion ==== x_converted = sess.run( thumbnail, {X_u: x_1, Y_u: np.eye(arch['y_dim'])}) imshow( img_list=[x_converted], filename=os.path.join( args.logdir, 'Ep-{:03d}-conv.png'.format(ep))) # == Confusion Matrix == with open(logfile, 'a') as f: cm = metrics.confusion_matrix(y_t, y_p) n, m = cm.shape for i in range(n): for j in range(m): f.write('{:4d} '.format(cm[i, j])) f.write('\n') acc = metrics.accuracy_score(y_t, y_p) f.write('Accuracy: {:.4f}\n'.format(acc)) f.write('\n\n') except KeyboardInterrupt: print('Aborted') finally: save(saver, sess, args.logdir, step)
model_config["exp_logs_dir"] = exp_logs model_config["exp_save_models_dir"] = exp_saved_models # file path for gradient checking and plotting #file_loc = "/afs/inf.ed.ac.uk/user/s18/s1890219/Thesis/CVAE/experiments/cvae/output_logs/analysis_1/" #model_config["file_loc"] = file_loc # the parameter name suggest what to evaluate on model_config[ "test_user_item_interaction_dict"] = val_user_item_interaction_dict model_config[ "train_user_item_interaction_dict"] = train_user_item_interaction_dict ##### define the model ##### if args.model_type == "cvae": model = CVAE(config=model_config).to(device) print(model) #criterion = torch.nn.MSELoss() criterion = torch.nn.CrossEntropyLoss() # size_average is set to False, the losses are instead summed for each minibatch #criterion.size_average = False learning_rate = 1e-4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) # train the model if args.exp_type == "train": train.train_and_val(model, train_dataloader, val_dataloader, \ criterion, optimizer, args, model_config)
test_dataset = CelebA(label=opts.label, path=opts.path, train=False, transform=transforms.ToTensor()) dataloader = { 'train': torch.utils.data.DataLoader(train_dataset, batch_size=opts.batch_size, shuffle=True), 'test': torch.utils.data.DataLoader(test_dataset, batch_size=opts.batch_size, shuffle=False) } cvae = CVAE(opts.latent_size, device).to(device) dis = Discriminator().to(device) classifier = Classifier(opts.latent_size).to(device) classer = CLASSIFIERS().to(device) print(cvae) print(dis) print(classifier) optimizer_cvae = torch.optim.Adam(cvae.parameters(), lr=opts.lr, betas=(opts.b1, opts.b2), weight_decay=opts.weight_decay) optimizer_dis = torch.optim.Adam(dis.parameters(), lr=opts.lr, betas=(opts.b1, opts.b2),