def train(args, train_dataloader, valid_dataloader): torch.manual_seed(87) torch.cuda.manual_seed(87) autoencoder = cc(CVAE()) criterion = torch.nn.MSELoss() opt = torch.optim.Adam(autoencoder.parameters(), lr=args.lr) best_loss = 1e100 for epoch in range(args.epoch): print(f' Epoch {epoch}') loss = _run_train(autoencoder, criterion, opt, train_dataloader) print('\t [Info] Avg training loss:{:.5f}'.format( loss / len(train_dataloader.dataset))) loss = _run_eval(autoencoder, criterion, valid_dataloader) print('\t [Info] Avg valid loss:{:.5f}'.format( loss / len(valid_dataloader.dataset))) if True or loss < best_loss: best_loss = loss save_path = "{}/epoch_{}_loss_{:.4f}".format( args.save_path, epoch, loss / len(valid_dataloader.dataset)) torch.save({'state_dict': autoencoder.state_dict()}, f"{save_path}_autoencoder_.pt") print(f'\t [Info] save weights at {save_path}') for param_group in opt.param_groups: param_group['lr'] = param_group['lr'] / 1 print('-----------------------------------------------')
def main(args): # Check if the output folder is exist if not os.path.exists(args.folder): os.mkdir(args.folder) # Load model model = CVAE().cuda() if torch.cuda.is_available() else CVAE() model.load_state_dict(torch.load(os.path.join(args.folder, 'cvae.pth'))) # Generate sample = torch.randn(args.num, 20) label = torch.from_numpy(np.asarray([args.digits] * args.num)) sample = Variable( sample).cuda() if torch.cuda.is_available() else Variable(sample) sample = model.decode(sample, label).cpu() save_image(sample.view(args.num, 1, 28, 28).data, os.path.join(args.folder, 'generate.png'), nrow=10)
def __init__(self, run_id=1, output_path="", env_path_root=""): super().__init__() self.cvae = CVAE(run_id=run_id) self.device = torch.device('cuda' if CUDA_AVAILABLE else 'cpu') self.output_path = output_path self.env_path_root = env_path_root if self.output_path is not None: if os.path.exists(self.output_path): shutil.rmtree(self.output_path) os.mkdir(self.output_path)
def main(args): # Check if the output folder is exist if not os.path.exists(args.folder): os.mkdir(args.folder) # Load data torch.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) # Load model model = CVAE().cuda() if torch.cuda.is_available() else CVAE() optimizer = optim.Adam(model.parameters(), lr=1e-3) # Train and generate sample every epoch loss_list = [] for epoch in range(1, args.epochs + 1): model.train() _loss = train(epoch, model, train_loader, optimizer) loss_list.append(_loss) model.eval() sample = torch.randn(100, 20) label = torch.from_numpy(np.asarray(list(range(10)) * 10)) sample = Variable( sample).cuda() if torch.cuda.is_available() else Variable(sample) sample = model.decode(sample, label).cpu() save_image(sample.view(100, 1, 28, 28).data, os.path.join(args.folder, 'sample_' + str(epoch) + '.png'), nrow=10) plt.plot(range(len(loss_list)), loss_list, '-o') plt.savefig(os.path.join(args.folder, 'cvae_loss_curve.png')) torch.save(model.state_dict(), os.path.join(args.folder, 'cvae.pth'))
def main(argv): manager = DataManager(flags.image_dir) #manager.load() sess = tf.Session() model = CVAE(gamma=flags.gamma, capacity_limit=flags.capacity_limit, capacity_change_duration=flags.capacity_change_duration, learning_rate=flags.learning_rate) sess.run(tf.global_variables_initializer()) saver = load_checkpoints(sess) if flags.training: print("Training") # Train train(sess, model, manager, saver) # train(sess, model, imcrop, saver) else: print("jejeje")
train = pd.read_csv("dataset/VAE_Train+.csv") test = pd.read_csv("dataset/VAE_Test+.csv") trainx, trainy = np.array( train[train.columns[train.columns != "class"]]), np.array( pd.get_dummies(train["class"])) testx, testy = np.array( test[train.columns[train.columns != "class"]]), np.array( pd.get_dummies(test["class"])) batch_size = 512 max_epoch = 100 train_N = len(train) test_N = len(test) gpu = False device = "cuda" if gpu else "cpu" model = CVAE() if gpu: model = model.cuda() opt = optim.Adadelta(model.parameters(), lr=1e-3) def Loss_function(x_hat, x, mu, logsimga): reconstraction_loss = F.binary_cross_entropy(x_hat, x, size_average=False) KL_div = -0.5 * th.sum(1 + logsimga - mu.pow(2) - logsimga.exp()) return reconstraction_loss + KL_div def create_batch(x, y): a = list(range(len(x))) np.random.shuffle(a)
_, c_pairs = readLangs("test_c", "./data/test_c.txt") total_bleu_score = 0.0 for pair, c_pair in zip(pairs, c_pairs): print('>', pair[0]) print('=', pair[1]) output_words = evaluate(model, c_pair, pair[0]) output_sentence = ''.join(output_words) print('<', output_sentence) print('') total_bleu_score += bleu_score(pair[1], output_sentence) _bleu_score = total_bleu_score / len(pairs) print(f"Bleu Score: {_bleu_score}") return _bleu_score device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tense2index = {'sp': 0, 'tp': 1, 'pg': 2, 'p': 3} latent_size = 32 hidden_size = 256 input_lang = torch.load("./lang_class.pth") checkpoint = torch.load("./checkpoint/0.7902374299152759_61208.pth") model = CVAE(28, hidden_size, latent_size, 28).to(device) model.load_state_dict(checkpoint['state_dict']) evaluateByTestData(model)
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) pad_idx = vocab.word2idx['<pad>'] sos_idx = vocab.word2idx['<start>'] eos_idx = vocab.word2idx['<end>'] unk_idx = vocab.word2idx['<unk>'] # Build data loader train_data_loader, valid_data_loader = get_loader( args.train_image_dir, args.val_image_dir, args.train_caption_path, args.val_caption_path, vocab, args.batch_size, shuffle=True, num_workers=args.num_workers) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': # return float(1 / (1 + np.exp(-k * (step - x0)))) return float(expit(k * (step - x0))) elif anneal_function == 'linear': return min(1, step / x0) nll = torch.nn.NLLLoss(ignore_index=pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data[0]].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood nll_loss = nll(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return nll_loss, KL_loss, KL_weight # Build the models model = CVAE(vocab_size=len(vocab), embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, max_sequence_length=args.max_sequence_length, num_layers=args.num_layers, bidirectional=args.bidirectional, pad_idx=pad_idx, sos_idx=sos_idx, eos_idx=eos_idx, unk_idx=unk_idx) model.to(device) # Loss and optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # Train the models total_step = len(train_data_loader) step_for_kl_annealing = 0 best_valid_loss = float("inf") patience = 0 for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(train_data_loader): # Set mini-batch dataset images = images.to(device) captions_src = captions[:, :captions.size()[1] - 1] captions_tgt = captions[:, 1:] captions_src = captions_src.to(device) captions_tgt = captions_tgt.to(device) lengths = lengths - 1 lengths = lengths.to(device) # Forward, backward and optimize logp, mean, logv, z = model(images, captions_src, lengths) #loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, captions_tgt, lengths, mean, logv, args.anneal_function, step_for_kl_annealing, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / args.batch_size # backward + optimization optimizer.zero_grad() loss.backward() optimizer.step() step_for_kl_annealing += 1 # Print log info if i % args.log_step == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) outputs = model._sample(logp) outputs = outputs.cpu().numpy() # Convert word_ids to words sampled_caption = [] ground_truth_caption = [] for word_id in outputs[-1]: word = vocab.idx2word[word_id] sampled_caption.append(word) if word == '<end>': break captions_tgt = captions_tgt.cpu().numpy() for word_id in captions_tgt[-1]: word = vocab.idx2word[word_id] ground_truth_caption.append(word) if word == '<end>': break reconstructed = ' '.join(sampled_caption) ground_truth = ' '.join(ground_truth_caption) print("ground_truth: {0} \n reconstructed: {1}\n".format( ground_truth, reconstructed)) # Save the model checkpoints if (i + 1) % args.save_step == 0: torch.save( model.state_dict(), os.path.join(args.model_path, 'model-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( model.state_dict(), os.path.join(args.model_path, 'model-{}-epoch.ckpt'.format(epoch + 1))) valid_loss = 0 #check against validation set and early stop if the validation score is not improving within patience period for j, (images, captions, lengths) in enumerate(valid_data_loader): # Set mini-batch dataset images = images.to(device) captions_src = captions[:, :captions.size()[1] - 1] captions_tgt = captions[:, 1:] captions_src = captions_src.to(device) captions_tgt = captions_tgt.to(device) lengths = lengths - 1 lengths = lengths.to(device) # Forward, backward and optimize logp, mean, logv, z = model(images, captions_src, lengths) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, captions_tgt, lengths, mean, logv, args.anneal_function, step_for_kl_annealing, args.k, args.x0) valid_loss += (NLL_loss + KL_weight * KL_loss) / args.batch_size if j == 2: break print("validation loss for epoch {}: {}".format(epoch + 1, valid_loss)) print("patience is at {}".format(patience)) if valid_loss < best_valid_loss: best_valid_loss = valid_loss patience = 0 else: patience += 1 if patience == 5: print("early stopping at epoch {}".format(epoch + 1)) break
test_size = 10000 random_vector_for_generation = tf.random.normal( shape=[num_examples_to_generate, latent_dim]) classifier = Classifier(shape=(28, 28, 1)) classifier_path = checkpoint_path = "./checkpoints/classifier" cls = tf.train.Checkpoint(classifier=classifier) cls_manager = tf.train.CheckpointManager(cls, classifier_path, max_to_keep=5) inception_model = Inception_score() if cls_manager.latest_checkpoint: cls.restore(cls_manager.latest_checkpoint) print('classifier checkpoint restored!!') for i in range(10, 0, -1): epochs = 0 model = CVAE(latent_dim=latent_dim, beta=3) sample_size = i * 100 train_size = sample_size * 10 train_images = divide_dataset(train_set, train_labels, sample_size) #train_size = 10000 #train_images = train_set batch_size = 32 train_dataset = (tf.data.Dataset.from_tensor_slices( train_images).shuffle(train_size).batch(batch_size)) test_dataset = (tf.data.Dataset.from_tensor_slices( test_images).shuffle(test_size).batch(batch_size)) date = '3_4/' str_i = str(i) file_path = 'sample_test' + str_i start_train(epochs, model, train_dataset, test_dataset, date,
torch.backends.cudnn.benchmark = False # fetch data data = locate('data.get_%s' % args.dataset)(args) # make dataloaders train_loader, val_loader, test_loader = [ CLDataLoader(elem, args, train=t) for elem, t in zip(data, [True, False, False]) ] model = ResNet18(args.n_classes, nf=20, input_size=args.input_size).to(args.device) opt = torch.optim.SGD(model.parameters(), lr=0.1) gen = CVAE(20, args).cuda() # this is actually an autoencoder opt_gen = torch.optim.Adam(gen.parameters()) # build buffer if args.store_latents: buffer = Buffer(args, input_size=(20 * 4 * 4, )) else: buffer = Buffer(args) buffer.min_per_class = 0 print('multiple heads ', args.multiple_heads) if run == 0: print("number of classifier parameters:", sum([np.prod(p.size()) for p in model.parameters()])) print("number of generator parameters: ",
def load_model(PATH): model = CVAE(2, 128) # Restore the weights model.load_weights(PATH) return model
# testing instances test_loader = torch.utils.data.DataLoader( datasets.MNIST( '../data', train=False, transform=transforms.ToTensor() ), batch_size=args.batch_size, shuffle=False, **kwargs) isVAE = True # initialize the model if args.model == "VAE" or args.model == "VAE_INC": model = VAE() elif args.model == "CVAE_LB": isVAE = False model = CVAE(784, 400, 20, 10, False) else: isVAE = False model = CVAE(784, 400, 20, 784, False) if args.cuda: model.cuda() # Binary Cross-Entropy loss if args.loss == "MSE": reconstruction_function = nn.MSELoss() else: reconstruction_function = nn.BCELoss() reconstruction_function.size_average = False
labelled, unlabelled, validation = get_mnist( location="./data", batch_size=args.batch_size, labels_per_class=args.labels_per_class) prev_loss = float('inf') X = 784 Y = 10 Z = 20 H = 400 C = [400, 128] if args.architecture == 'vae': model = VAE(X, Y, Z, H) elif args.architecture == 'cvae': model = CVAE(X, Y, Z, H, C) elif args.architecture == 'stackedvae': vae = VAE(X, Y, Z, H) vae.load_state_dict(torch.load(args.pretrained_vae)) model = StackedVAE(X, Y, Z, H, C, vae) elif args.architecture == 'gmvae': model = GMVAE(X, Y, Z, H, C) else: raise ValueError('Model architecture {} is not defined'.format( args.architecture)) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) def train(epoch): model.train()
def train(args, train_dataloader, valid_dataloader): torch.manual_seed(87) torch.cuda.manual_seed(87) model = {'autoen': cc(CVAE()), 'discri': cc(Discriminator())} autoencoder, discriminator = model['autoen'], model['discri'] criterion = torch.nn.MSELoss() optimizer = { 'autoen_adam': torch.optim.Adam(autoencoder.parameters(), lr=args.lr), 'autoen_sgd': torch.optim.SGD(autoencoder.parameters(), lr=args.lr), 'discri_sgd': torch.optim.SGD(discriminator.parameters(), lr=args.lr) } ########################################################################## ########## [Stage 1] train autoencoder ########## ##round1 avg_loss = _run_train_autoencoder(model, criterion, optimizer, train_dataloader) ##round2 avg_loss = _run_train_autoencoder(model, criterion, optimizer, train_dataloader) ##round3 avg_loss = _run_train_autoencoder(model, criterion, optimizer, train_dataloader) ##round4 avg_loss = _run_train_autoencoder(model, criterion, optimizer, train_dataloader) torch.save({'state_dict': autoencoder.state_dict()}, "{}/stage_1_autoencoder_loss_{:.4f}.pt".format( args.save_path, avg_loss)) print( "\t [stage 1] trained autoencoder, avg reconstruct loss:{:.4f}".format( avg_loss)) print('\t----------------------------------------------------------') ########################################################################## ######### [Stage 2] train discriminator ######### ##round1 avg_loss = _run_train_discriminator(model, criterion, optimizer, train_dataloader) ##round2 avg_loss = _run_train_discriminator(model, criterion, optimizer, train_dataloader) ##round3 avg_loss = _run_train_discriminator(model, criterion, optimizer, train_dataloader) torch.save({'state_dict': discriminator.state_dict()}, "{}/stage_2_discriminat_loss_{:.4f}.pt".format( args.save_path, avg_loss)) print("\t [stage 2] trained discriminator, avg mse loss:{:.4f}".format( avg_loss)) print('\t----------------------------------------------------------') ########################################################################## ######## [Stage 3] adverserial training ######### print("\t [stage 3] adversarial training ") for epoch in range(args.epoch): print(f' Epoch {epoch}') avg_auto_loss, avg_disc_loss = _run_train_adversarial( model, criterion, optimizer, train_dataloader) print( "\t [Info] Train, avg autoenc loss:{:.4f}, avg discri loss:{:.4f}". format(avg_auto_loss, avg_disc_loss)) avg_auto_loss, avg_disc_loss = _run_eval(model, criterion, valid_dataloader) print( "\t [Info] Valid, avg autoenc recons loss:{:.4f},avg discri loss:{:.4f}" .format(avg_auto_loss, avg_disc_loss)) if True or loss < best_loss: torch.save({'state_dict': autoencoder.state_dict()}, "{}/epoch_{}_autoencoder_loss_{:.4f}.pt".format( args.save_path, epoch, avg_auto_loss)) torch.save({'state_dict': discriminator.state_dict()}, "{}/epoch_{}_discriminat_loss_{:.4f}.pt".format( args.save_path, epoch, avg_disc_loss)) print( f'\t [Info] save weights at {args.save_path}/epoch_{epoch}_...' ) print('\t----------------------------------------------------------')
def main(): args = parse_args() processeddata_dir = \ "../Hanover_Dataset/HannoverDataset/processed_data/juncs_trips_%02.0f.npy"%args.min_trips rawdata_file = "../Hanover_Dataset/HannoverDataset" # Process and save the data if args.process_data: junctTrajs_dirs = sorted( glob.glob(os.path.join(rawdata_file, "junctTrajs/*.csv"))) junctions_dir = os.path.join(rawdata_file, "junctions.csv") juncs_trips = preprocess(junctTrajs_dirs, junctions_dir, min_trips=args.min_trips, upper_threshold=args.upper_threshold, lower_threshold=args.lower_threshold, window_size=args.window_size) np.save(processeddata_dir, juncs_trips) else: juncs_trips = np.load(processeddata_dir) # Load the sequence data using the sliding window scheme data_loader = Load_data(juncs_trips, window_size=args.window_size, stride=args.stride) # Load the sequence data and get the data index data = [sequence for sequence in data_loader.sliding_window()] data = np.reshape(data, (-1, 19)) # data index + features = 10 + 9 = 19 # Note, due to the data imbalance, # merge tram_rails (-1) and yield_sigh (1) to priority_sign (2) if args.num_classes == 3: data[data[:, 2] == -1, 2] = 2 data[data[:, 2] == 1, 2] = 2 # # Filter out -1 and 1 # data = data[data[:, 2]!=-1, :] # data = data[data[:, 2]!=1, :] # new target class: # uncontrolled:0, # traffic_light:1, # tram_rails/yield_sigh/priority_sign data[data[:, 2] == 4, 2] = 1 # Filter out 3:"stop S." and 5:"roundabout" data = data[data[:, 2] != 3, :] data = data[data[:, 2] != 5, :] # Normalize the features data[:, 10:] = normalization(data[:, 10:]) # Get the class label label = data[:, 2].astype(int) assert args.num_classes == len( np.unique(label)), "The number of classes is not correct" _label = np.eye(args.num_classes)[label].reshape(-1, args.window_size, args.num_classes) # Question: how to do the data partitioning data = np.reshape(data, (-1, args.window_size, 19)) print(data.shape) train_val_split = data_partition(data, args) train_data_index = data[train_val_split, -1, :10] # the last step of the sliding window # 10/0: junc_utm_to_center, # 11/1: utm_east, # 12/2: utm_north, # 13/3: utm_east_speed, # 14/4: utm_east_speed, # 15/5: speed_1, # 16/6: speed_2, (old table speed) # 17/7: delta_time # 18/8: angle # ============================================================================= # train_x = data[train_val_split, :, 10:19] # train_x = np.concatenate((train_x[:, :, 0:6], train_x[:, :, 7:8]), axis=2) # skip the old speed # train_y = _label[train_val_split, -1, :] # the last step of the sliding window # # val_data_index = data[~train_val_split, -1, :10] # the last step of the sliding window # val_x = data[~train_val_split, :, 10:19] # val_x = np.concatenate((val_x[:, :, 0:6], val_x[:, :, 7:8]), axis=2) # val_y = _label[~train_val_split, -1, :] # the last step of the sliding window # ============================================================================= # ToDo, feature/ablation # remove delta_t train_x = data[train_val_split, :, 10:19] train_x = np.concatenate((train_x[:, :, 0:5], train_x[:, :, 7:8]), axis=2) # train_x = train_x[:, :, 2:5] train_y = _label[train_val_split, -1, :] val_data_index = data[~train_val_split, -1, :10] val_x = data[~train_val_split, :, 10:19] val_x = np.concatenate((val_x[:, :, 0:5], val_x[:, :, 7:8]), axis=2) # val_x = val_x[:, :, 2:5] val_y = _label[~train_val_split, -1, :] print( np.unique(np.argmax(val_y.reshape(-1, args.num_classes), axis=1), return_counts=True)) print("train_data_index", train_data_index.shape) print("train_x", train_x.shape) print("train_y", train_y.shape) print("val_data_index", val_data_index.shape) print("val_x", val_x.shape) print("val_y", val_y.shape) ########################################################################## ## START THE CLASSIFICATION TASK # Define the callback and early stop if not os.path.exists("../models"): os.mkdir("../models") timestr = time.strftime("%Y%m%d-%H%M%S") filepath = "../models/cvae_%0.f_%s.hdf5" % (args.epochs, timestr) ## Eraly stop earlystop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=args.patience) checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, mode='min') callbacks_list = [earlystop, checkpoint] # # Instantiate the model cvae = CVAE(args) # Contruct the cave model train = cvae.training() train.summary() # # Start training phase if args.train_mode: # train.load_weights("../models/cvae_500_20201008-213111_03_01_90.hdf5") print("Start training the model...") train.fit(x=[train_x, train_y], y=train_y, shuffle=True, epochs=args.epochs, batch_size=args.batch_size, verbose=1, callbacks=callbacks_list, validation_data=([val_x, val_y], val_y)) train.load_weights(filepath) else: print('Run pretrained model') # train.load_weights("../models/cvae_200_20200709-211305.hdf5") train.load_weights("../models/cvae_500_20201008-213111_03_01_90.hdf5") # # Start inference phase x_encoder = cvae.X_encoder() decoder = cvae.Decoder() x_encoder.summary() decoder.summary() x_latent = x_encoder.predict(val_x, batch_size=args.batch_size) y_primes = [] for i, x_ in enumerate(x_latent): # sampling z from a normal distribution x_ = np.reshape(x_, [1, -1]) z_sample = np.random.rand(1, args.z_dim) y_p = decoder.predict(np.column_stack([z_sample, x_])) y_primes.append(y_p) y_primes = np.reshape(y_primes, (-1, args.num_classes)) ## Evaluation print("Prediction for each sliding window...") target_names = ['uc', 'tl', 'ps'] eva = Evaluation(val_y.reshape(-1, args.num_classes), y_primes, target_names) confusion_matrix = eva.cf_matrix() classification_report = eva.report() # Sum up the prediction for each trip and each junction print("Prediction for each arm...") junc_classifier = Junction_class(val_data_index, val_y, y_primes) arm_gt, arm_pd = junc_classifier.avg_classfier() arm_eva = Evaluation(arm_gt, arm_pd[:, 0], target_names, arg=False) arm_confusion_matrix = arm_eva.cf_matrix() arm_classification_report = arm_eva.report()
output_sentence = ''.join(output_words) # print('<', output_sentence) # print('') total_bleu_score += bleu_score(pair[1], output_sentence) _bleu_score = total_bleu_score / len(pairs) # print(f"Bleu Score: {_bleu_score}") return _bleu_score def evaluateRandomly(encoder, decoder, n=10): for i in range(n): pair = random.choice(pairs) idx = np.random.randint(0, 4) print('>', pair[idx]) print('=', pair[idx]) output_words = evaluate(encoder, decoder, idx, pair[idx]) output_sentence = ' '.join(output_words) print('<', output_sentence) print('') def save_checkpoint(state, filename): torch.save(state, filename) hidden_size = 256 latent_size = 32 model = CVAE(input_lang.n_words, hidden_size, latent_size, input_lang.n_words).to(device) trainIters(model, 100000, print_every=5000) evaluateByTestData(model)
n_classes = len(i2int) sos_idx = w2i['SOS'] eos_idx = w2i['EOS'] pad_idx = w2i['<pad>'] unk_idx = w2i['<unk>'] NLL = torch.nn.NLLLoss(reduction='sum', ignore_index=pad_idx) model = CVAE(vocab_size=len(i2w), max_sequence_length=args.max_sequence_length, sos_idx=sos_idx, eos_idx=eos_idx, pad_idx=pad_idx, unk_idx=unk_idx, embedding_size=args.emb_dim, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, z_size=args.latent_size, n_classes=n_classes, num_layers=args.num_layers, bidirectional=args.bidirectional, temperature=args.temperature) if args.load_model is not None: state_dict = torch.load(args.load_model) print(state_dict['embedding.weight'].size(), model.embedding.weight.size()) if state_dict['embedding.weight'].size( 0) != model.embedding.weight.size(0): # vocab changed state_dict['embedding.weight'] = vocab.vectors
def train(train_A_dir, train_B_dir, model_dir, model_name, random_seed, val_A_dir, val_B_dir, output_dir, tensorboard_dir, load_path, gen_eval=True): np.random.seed(random_seed) # For now, copy hyperparams used in the CycleGAN num_epochs = 100000 mini_batch_size = 1 # mini_batch_size = 1 is better learning_rate = 0.0002 learning_rate_decay = learning_rate / 200000 sampling_rate = 16000 num_mcep = 24 frame_period = 5.0 n_frames = 128 lambda_cycle = 10 lambda_identity = 5 device = 'cuda' # Use the same pre-processing as the CycleGAN print("Begin Preprocessing") wavs_A = load_wavs(wav_dir=train_A_dir, sr=sampling_rate) wavs_B = load_wavs(wav_dir=train_B_dir, sr=sampling_rate) print("Finished Loading") f0s_A, timeaxes_A, sps_A, aps_A, coded_sps_A = world_encode_data( wavs=wavs_A, fs=sampling_rate, frame_period=frame_period, coded_dim=num_mcep) f0s_B, timeaxes_B, sps_B, aps_B, coded_sps_B = world_encode_data( wavs=wavs_B, fs=sampling_rate, frame_period=frame_period, coded_dim=num_mcep) print("Finished Encoding") log_f0s_mean_A, log_f0s_std_A = logf0_statistics(f0s_A) log_f0s_mean_B, log_f0s_std_B = logf0_statistics(f0s_B) print('Log Pitch A') print('Mean: %f, Std: %f' % (log_f0s_mean_A, log_f0s_std_A)) print('Log Pitch B') print('Mean: %f, Std: %f' % (log_f0s_mean_B, log_f0s_std_B)) coded_sps_A_transposed = transpose_in_list(lst=coded_sps_A) coded_sps_B_transposed = transpose_in_list(lst=coded_sps_B) coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std = coded_sps_normalization_fit_transoform( coded_sps=coded_sps_A_transposed) print("Input data fixed.") coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std = coded_sps_normalization_fit_transoform( coded_sps=coded_sps_B_transposed) if not os.path.exists(model_dir): os.makedirs(model_dir) np.savez(os.path.join(model_dir, 'logf0s_normalization.npz'), mean_A=log_f0s_mean_A, std_A=log_f0s_std_A, mean_B=log_f0s_mean_B, std_B=log_f0s_std_B) np.savez(os.path.join(model_dir, 'mcep_normalization.npz'), mean_A=coded_sps_A_mean, std_A=coded_sps_A_std, mean_B=coded_sps_B_mean, std_B=coded_sps_B_std) if val_A_dir is not None: validation_A_output_dir = os.path.join(output_dir, 'converted_A') if not os.path.exists(validation_A_output_dir): os.makedirs(validation_A_output_dir) if val_B_dir is not None: validation_B_output_dir = os.path.join(output_dir, 'converted_B') if not os.path.exists(validation_B_output_dir): os.makedirs(validation_B_output_dir) print("End Preprocessing") if load_path is not None: model = CVAE(num_mcep, 128, num_mcep, 2) model.load_state_dict(torch.load(load_path)) model.eval() if device == 'cuda': model.cuda() print("Loaded Model from path %s" % load_path) if val_A_dir is not None and gen_eval: print("Generating Evaluation Data") for file in os.listdir(val_A_dir): filepath = os.path.join(val_A_dir, file) print( "Converting {0} from Class 0 to Class 1".format(filepath)) wav, _ = librosa.load(filepath, sr=sampling_rate, mono=True) wav = wav_padding(wav=wav, sr=sampling_rate, frame_period=frame_period, multiple=4) f0, timeaxis, sp, ap = world_decompose( wav=wav, fs=sampling_rate, frame_period=frame_period) f0_converted = pitch_conversion(f0=f0, mean_log_src=log_f0s_mean_A, std_log_src=log_f0s_std_A, mean_log_target=log_f0s_mean_B, std_log_target=log_f0s_std_B) coded_sp = world_encode_spectral_envelop(sp=sp, fs=sampling_rate, dim=num_mcep) coded_sp_transposed = coded_sp.T coded_sp_norm = (coded_sp_transposed - coded_sps_A_mean) / coded_sps_A_std coded_sp_converted_norm, _, _ = model.convert( np.array([coded_sp_norm]), 0, 1, device) coded_sp_converted_norm = coded_sp_converted_norm.cpu().numpy() coded_sp_converted_norm = np.squeeze(coded_sp_converted_norm) coded_sp_converted = coded_sp_converted_norm * coded_sps_B_std + coded_sps_B_mean coded_sp_converted = coded_sp_converted.T coded_sp_converted = np.ascontiguousarray(coded_sp_converted) decoded_sp_converted = world_decode_spectral_envelop( coded_sp=coded_sp_converted, fs=sampling_rate) wav_transformed = world_speech_synthesis( f0=f0_converted, decoded_sp=decoded_sp_converted, ap=ap, fs=sampling_rate, frame_period=frame_period) librosa.output.write_wav( os.path.join(validation_A_output_dir, 'eval_' + os.path.basename(file)), wav_transformed, sampling_rate) exit(0) print("Begin Training") model = CVAE(num_mcep, 128, num_mcep, 2) optimizer = optim.Adam(model.parameters(), lr=learning_rate) writer = SummaryWriter(tensorboard_dir) if device == 'cuda': model.cuda() for epoch in tqdm(range(num_epochs)): dataset_A, dataset_B = sample_train_data(dataset_A=coded_sps_A_norm, dataset_B=coded_sps_B_norm, n_frames=n_frames) dataset_A = torch.tensor(dataset_A).to(torch.float) dataset_B = torch.tensor(dataset_B).to(torch.float) n_samples, input_dim, depth = dataset_A.shape y_A = F.one_hot(torch.zeros(depth).to(torch.int64), num_classes=2).to(torch.float).T y_B = F.one_hot(torch.ones(depth).to(torch.int64), num_classes=2).to(torch.float).T (y_A, y_B) = (y_A.reshape((1, 2, depth)), y_B.reshape((1, 2, depth))) y_A = torch.cat([y_A] * n_samples) y_B = torch.cat([y_B] * n_samples) # dataset_A = torch.cat((dataset_A, y_A), axis=1) # dataset_B = torch.cat((dataset_B, y_B), axis=1) X = torch.cat((dataset_A, dataset_B)).to(device) Y = torch.cat((y_A, y_B)).to(device) # out, z_mu, z_var = model(dataset_A, y_A) # rec_loss = F.binary_cross_entropy(out, dataset_A, size_average=False) # kl_diver = -0.5 * torch.sum(1 + z_var - z_mu.pow(2) - z_var.exp()) out, z_mu, z_var = model(X, Y) rec_loss = F.binary_cross_entropy(out, X, size_average=False) kl_diver = -0.5 * torch.sum(1 + z_var - z_mu.pow(2) - z_var.exp()) loss = rec_loss + kl_diver writer.add_scalar('Reconstruction Loss', rec_loss, epoch) writer.add_scalar('KL-Divergence', kl_diver, epoch) writer.add_scalar('Total Loss', loss, epoch) # print("loss = {0} || rec = {1} || kl = {2}".format(loss, rec_loss, kl_diver)) loss.backward() optimizer.step() if val_A_dir is not None: if epoch % 1000 == 0: print('Generating Validation Data...') for file in os.listdir(val_A_dir): filepath = os.path.join(val_A_dir, file) print("Converting {0} from Class 0 to Class 1".format( filepath)) wav, _ = librosa.load(filepath, sr=sampling_rate, mono=True) wav = wav_padding(wav=wav, sr=sampling_rate, frame_period=frame_period, multiple=4) f0, timeaxis, sp, ap = world_decompose( wav=wav, fs=sampling_rate, frame_period=frame_period) f0_converted = pitch_conversion( f0=f0, mean_log_src=log_f0s_mean_A, std_log_src=log_f0s_std_A, mean_log_target=log_f0s_mean_B, std_log_target=log_f0s_std_B) coded_sp = world_encode_spectral_envelop(sp=sp, fs=sampling_rate, dim=num_mcep) coded_sp_transposed = coded_sp.T coded_sp_norm = (coded_sp_transposed - coded_sps_A_mean) / coded_sps_A_std coded_sp_converted_norm, _, _ = model.convert( np.array([coded_sp_norm]), 0, 1, device) coded_sp_converted_norm = coded_sp_converted_norm.cpu( ).numpy() coded_sp_converted_norm = np.squeeze( coded_sp_converted_norm) coded_sp_converted = coded_sp_converted_norm * coded_sps_B_std + coded_sps_B_mean coded_sp_converted = coded_sp_converted.T coded_sp_converted = np.ascontiguousarray( coded_sp_converted) decoded_sp_converted = world_decode_spectral_envelop( coded_sp=coded_sp_converted, fs=sampling_rate) wav_transformed = world_speech_synthesis( f0=f0_converted, decoded_sp=decoded_sp_converted, ap=ap, fs=sampling_rate, frame_period=frame_period) librosa.output.write_wav( os.path.join(validation_A_output_dir, str(epoch) + '_' + os.path.basename(file)), wav_transformed, sampling_rate) break if epoch % 1000 == 0: print('Saving Checkpoint') filepath = os.path.join(model_dir, model_name) if not os.path.exists(filepath): os.makedirs(filepath) torch.save(model.state_dict(), os.path.join(filepath, '{0}.ckpt'.format(epoch)))
def main(args): # Image preprocessing transform = transforms.Compose([ transforms.Resize((224, 224), Image.LANCZOS), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) pad_idx = vocab.word2idx['<pad>'] sos_idx = vocab.word2idx['<start>'] eos_idx = vocab.word2idx['<end>'] unk_idx = vocab.word2idx['<unk>'] # Build the models model = CVAE( vocab_size=len(vocab), embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, max_sequence_length=args.max_sequence_length, num_layers=args.num_layers, bidirectional=args.bidirectional, pad_idx=pad_idx, sos_idx=sos_idx, eos_idx=eos_idx, unk_idx=unk_idx ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from {}".format(args.load_checkpoint)) model.to(device) model.eval() # Build data loader train_data_loader, valid_data_loader = get_loader(args.train_image_dir, args.val_image_dir, args.train_caption_path, args.val_caption_path, vocab, args.batch_size, shuffle=True, num_workers=args.num_workers) f1 = open('{}/results/generated_captions.txt'.format(dataset_root_dir), 'w') f2 = open('{}/results/ground_truth_captions.txt'.format(dataset_root_dir), 'w') for i, (images, captions, lengths) in enumerate(valid_data_loader): images = images.to(device) sampled_ids, z = model.inference(n=args.batch_size, c=images) sampled_ids_batches = sampled_ids.cpu().numpy() # (batch_size, max_seq_length) captions = captions.cpu().numpy() # Convert word_ids to words for j, sampled_ids in enumerate(sampled_ids_batches): sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] sampled_caption.append(word) if word == '<end>': break generated_sentence = ' '.join(sampled_caption) generated_sentence = generated_sentence.rstrip() generated_sentence = generated_sentence.replace("\n", "") generated_sentence = "{0}\n".format(generated_sentence) if j == 0: print("RE: {}".format(generated_sentence)) f1.write(generated_sentence) for g, ground_truth_ids in enumerate(captions): ground_truth_caption = [] for word_id in ground_truth_ids: word = vocab.idx2word[word_id] ground_truth_caption.append(word) if word == '<end>': break ground_truth_sentence = ' '.join(ground_truth_caption) ground_truth_sentence = ground_truth_sentence.rstrip() ground_truth_sentence = ground_truth_sentence.replace("\n", "") ground_truth_sentence = "{0}\n".format(ground_truth_sentence) if g == 0: print("GT: {}".format(ground_truth_sentence)) f2.write(ground_truth_sentence) if i % 10 == 0: print("This is the {0}th batch".format(i)) f1.close() f2.close()
def train(run_id=1): model = CVAE(run_id=run_id)
def main(): config = Settings(model='GPT', model_name='9.23_dropout0.1_GPT', resume='best.pth') dataset_path = os.path.join(os.getcwd(), config.path) dataset_filename = config.test_file device = config.device model_dir = os.path.join(config.exp_dir, config.model_name) with open(os.path.join(dataset_path, dataset_filename), 'rb') as f: dataset = pickle.load(f) dataset = LightDarkDataset(config, dataset) data, targets = collect_data(config, dataset) # with open(os.path.join(dataset_path, 'light_dark_sample_len15.pickle'), 'rb') as f: # sample = pickle.load(f) # data, targets = sample['data'], sample['targets'] if config.model == 'GPT': model = GPT2(config).to(device) elif config.model == 'RNN': model = RNN(config).to(device) elif config.model == 'CVAE': model = CVAE(config).to(device) optimizer = th.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) if config.optimizer == 'AdamW': scheduler = th.optim.lr_scheduler.LambdaLR( optimizer, lambda step: min((step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': scheduler = CosineAnnealingWarmUpRestarts(optimizer=optimizer, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: # |FIXME| using error?exception?logging? print( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) return # load checkpoint for resuming if config.resume is not None: filename = os.path.join(model_dir, config.resume) if os.path.isfile(filename): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, filename, model, optimizer, scheduler) start_epoch += 1 print("Loaded checkpoint '{}' (epoch {})".format( config.resume, start_epoch)) else: # |FIXME| using error?exception?logging? print("No checkpoint found at '{}'".format(config.resume)) return pred = [] total_time = 0. for d in data: for i in range(config.num_output): tmp_pred, time = predict_action(config, model, d) pred.append(tmp_pred) total_time += time targets = np.asarray(targets).reshape(-1, 2) pred = np.asarray(pred).reshape(-1, 2) print( f'Inference time: {total_time / (config.num_input * config.num_output)}' ) plt.xlim(-7, 7) plt.ylim(-7, 7) plt.scatter(targets[:, 0], targets[:, 1], c='red') plt.scatter(pred[:, 0], pred[:, 1], c='blue') plt.show()
# tight_layout minimizes the overlap between 2 sub-plots # plt.savefig('image_at_epoch_{:04d}.png'.format(epoch)) plt.show() epochs = 50 # set the dimensionality of the latent space to a plane for visualization later latent_dim = 16 num_examples_to_generate = 9 # keeping the random vector constant for generation (prediction) so # it will be easier to see the improvement. random_vector_for_generation = tf.random.normal( shape=[num_examples_to_generate, latent_dim]) model = CVAE(latent_dim) loss = tf.keras.metrics.Mean() model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) assert batch_size >= num_examples_to_generate for test_batch in test_dataset.take(1): test_sample = test_batch[0:num_examples_to_generate, :, :, :] for i in range(num_examples_to_generate): plt.subplot(3, 3, i + 1) plt.imshow(test_sample[i, :, :, :]) plt.axis('off') generate_and_save_images(model, 0, test_sample) for epoch in range(1, epochs + 1):
def main(): config = Settings() dataset_filename = config.dataset_file dataset_path = os.path.join(os.getcwd(), config.path) if not os.path.exists(config.exp_dir): os.mkdir(config.exp_dir) model_dir = os.path.join(config.exp_dir, config.model_name) logger = SummaryWriter(model_dir) with open(os.path.join(dataset_path, dataset_filename), 'rb') as f: dataset = pickle.load(f) print('#trajectories of dataset:', len(dataset['observation'])) # generate dataloader data_loader = get_loader_multi_target(config, dataset) # model device = th.device(config.device) if config.model == 'GPT': model = GPT2(config).to(device) elif config.model == 'RNN': model = RNN(config).to(device) elif config.model == 'LSTM': model = LSTM(config).to(device) elif config.model == 'CVAE': model = CVAE(config).to(device) else: raise Exception( f'"{config.model}" is not support!! You should select "GPT", "RNN", or "LSTM".' ) # optimizer optimizer = th.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) # learning rate scheduler if config.optimizer == 'AdamW': scheduler = th.optim.lr_scheduler.LambdaLR( optimizer, lambda step: min((step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': scheduler = CosineAnnealingWarmUpRestarts(optimizer=optimizer, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: raise Exception( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) # Metric if config.model == 'CVAE': eval_fn = NNMSE else: eval_fn = NMSE # Trainer & Evaluator evaluator = MultiTargetEvaluator(config=config, loader=data_loader, model=model, eval_fn=eval_fn) # load checkpoint for resuming for ckpt in config.resume: filename = os.path.join(model_dir, ckpt) if os.path.isfile(filename): epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, filename, model, optimizer, scheduler) print("Loaded checkpoint '{}' (epoch {})".format(ckpt, epoch)) else: raise Exception("No checkpoint found at '{}'".format(ckpt)) print(f'===== Evaluate {epoch} epoch =====') test_val = evaluator.eval(epoch) # Logging logger.add_scalar('Eval/Near-Nearest MSE', test_val, epoch) print(f'===== End {epoch} epoch =====')
(train_images, train_y)).shuffle(train_size).batch(args.batch_size)) test_dataset = (tf.data.Dataset.from_tensor_slices( (test_images, test_y)).shuffle(test_size).batch(args.batch_size)) optimizer = tf.keras.optimizers.Adam(1e-4) # # test # model = CVAE(latent_dim, inter_dim) # for test_batch in test_dataset.take(1): # test_sample_x = test_batch[0][0:num_examples_to_generate, :, :, :] # test_sample_y = test_batch[1][0:num_examples_to_generate] # generate_and_save_images(model, 0, test_sample_x, test_sample_y) # reset model = CVAE(args.latent_dim, args.inter_dim) for epoch in range(args.EPOCHS): for train in train_dataset: train_step(model, train[0], train[1], optimizer) with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss.result(), step=epoch) for test in test_dataset: test_step(model, test[0], test[1]) with test_summary_writer.as_default(): tf.summary.scalar('loss', test_loss.result(), step=epoch) template = 'Epoch {}, Loss: {}, Test Loss: {}' print( template.format(epoch + 1, train_loss.result(),
vocab_size = len(char) num_train_data = int(len(molecules)*0.75) train_molecules = molecules[0:num_train_data] test_molecules = molecules[num_train_data:-1] train_labels = labels[0:num_train_data] test_labels = labels[num_train_data:-1] train_length = length[0:num_train_data] test_length = length[num_train_data:-1] model = CVAE(vocab_size, batch_size = batch_size, latent_size = latent_size, stddev = 1.0, mean = 0.0, ) num_epochs = 200 save_every = 500 learning_rate = 0.0001 temperature = 1.0 min_temperature = 0.5 decay_rate = 0.95 for epoch in range(num_epochs): # Learning rate scheduling st = time.time() model.assign_lr(learning_rate * (decay_rate ** epoch)) train_loss = [] test_loss = []
raw_data = list(filter(lambda x: len(x[0]) <= opt.max_len, raw_data)) print("Load %d datapoints." % len(raw_data)) train_data, val_data, vocab = mydataset.make_dataset(raw_data, opt) opt.max_words = min(opt.max_words, len(vocab) - 4) train_iter, val_iter = mydataset.make_iterator((train_data, val_data), opt) device = T.device(opt.device) start_time = datetime.now().strftime("%Y%m%d-%H%M%S") layout = [('model={:s}', 'cvae'), ('z={:02d}', opt.z_dim), ('time={:s}', start_time), ('data={:s}', opt.dataset)] model_name = '_'.join([t.format(v) for (t, v) in layout]) writer = ut.prepare_writer(model_name, overwrite_existing=False) model = CVAE(opt).to(device) model.embedding.weight.data.copy_(vocab.vectors).to(device) train_gen = mydataset.make_loader(train_iter, opt) val_gen = mydataset.make_loader(val_iter, opt) if opt.mode == 'train': train(model=model, train_loader=train_gen, val_loader=val_gen, tqdm=tqdm.tqdm, device=device, writer=writer, start_time=start_time, dataset_name=opt.dataset, iter_max=opt.max_iter, iter_log=opt.log_iter,
def main(**kwargs): """ Main function that trains the model 1. Retrieve arguments from kwargs 2. Prepare data 3. Train 4. Display and save first batch of training set (truth and reconstructed) after every epoch 5. If latent dimension is 2, display and save latent variable of first batch of training set after every epoch Args: dataset: Which dataset to use decoder_type: How to model the output pixels, Gaussian or Bernoulli model_sigma: In case of Gaussian decoder, whether to model the sigmas too epochs: How many epochs to train model batch_size: Size of training / testing batch lr: Learning rate latent_dim: Dimension of latent variable print_every: How often to print training progress resume_path: The path of saved model with which to resume training resume_epoch: In case of resuming, the number of epochs already done Notes: - Saves model to folder 'saved_model/' every 20 epochs and when done - Capable of training from scratch and resuming (provide saved model location to argument resume_path) - Schedules learning rate with optim.lr_scheduler.ReduceLROnPlateau : Decays learning rate by 1/10 when mean loss of all training data does not decrease for 10 epochs """ # Retrieve arguments dataset = kwargs.get('dataset', defaults['dataset']) decoder_type = kwargs.get('decoder_type', defaults['decoder_type']) if decoder_type == 'Gaussian': model_sigma = kwargs.get('model_sigma', defaults['model_sigma']) epochs = kwargs.get('epochs', defaults['epochs']) batch_size = kwargs.get('batch_size', defaults['batch_size']) lr = kwargs.get('learning_rate', defaults['learning_rate']) latent_dim = kwargs.get('latent_dim', defaults['latent_dim']) print_every = kwargs.get('print_every', defaults['print_every']) resume_path = kwargs.get('resume_path', defaults['resume_path']) resume_epoch = kwargs.get('resume_epoch', defaults['resume_epoch']) # Specify dataset transform on load if decoder_type == 'Bernoulli': trsf = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: (x >= 0.5).float()) ]) elif decoder_type == 'Gaussian': trsf = transforms.ToTensor() # Load dataset with transform if dataset == 'MNIST': train_data = datasets.MNIST(root='MNIST', train=True, transform=trsf, download=True) test_data = datasets.MNIST(root='MNIST', train=False, transform=trsf, download=True) elif dataset == 'CIFAR10': train_data = datasets.CIFAR10(root='CIFAR10', train=True, transform=trsf, download=True) test_data = datasets.CIFAR10(root='CIFAR10', train=False, transform=trsf, download=True) # Instantiate dataloader train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False) # Instantiate/Load model and optimizer if resume_path: autoencoder = torch.load(resume_path, map_location=device) optimizer = optim.Adam(autoencoder.parameters(), lr=lr) print('Loaded saved model at ' + resume_path) else: if decoder_type == 'Bernoulli': autoencoder = CVAE(latent_dim, dataset, decoder_type).to(device) else: autoencoder = CVAE(latent_dim, dataset, decoder_type, model_sigma).to(device) optimizer = optim.Adam(autoencoder.parameters(), lr=lr) # Instantiate learning rate scheduler scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True, patience=5) # Announce current mode print( f'Start training CVAE with Gaussian encoder and {decoder_type} decoder on {dataset} dataset from epoch {resume_epoch+1}' ) # Prepare batch to display with plt first_test_batch, first_test_batch_label = iter(test_loader).next() first_test_batch, first_test_batch_label = first_test_batch.to( device), first_test_batch_label.to(device) # Display latent variable distribution before any training if latent_dim == 2 and resume_epoch == 0: autoencoder(first_test_batch, first_test_batch_label) display_and_save_latent(autoencoder.z, first_test_batch_label, f'-{decoder_type}-z{latent_dim}-e000') # Train autoencoder.train() for epoch in range(resume_epoch, epochs + resume_epoch): loss_hist = [] for batch_ind, (input_data, input_label) in enumerate(train_loader): input_data, input_label = input_data.to(device), input_label.to( device) # Forward propagation if decoder_type == 'Bernoulli': z_mu, z_sigma, p = autoencoder(input_data, input_label) elif model_sigma: z_mu, z_sigma, out_mu, out_sigma = autoencoder( input_data, input_label) else: z_mu, z_sigma, out_mu = autoencoder(input_data, input_label) # Calculate loss KL_divergence_i = 0.5 * torch.sum( z_mu**2 + z_sigma**2 - torch.log(1e-8 + z_sigma**2) - 1., dim=1) if decoder_type == 'Bernoulli': reconstruction_loss_i = -torch.sum(F.binary_cross_entropy( p, input_data, reduction='none'), dim=(1, 2, 3)) elif model_sigma: reconstruction_loss_i = -0.5 * torch.sum( torch.log(1e-8 + 6.28 * out_sigma**2) + ((input_data - out_mu)**2) / (out_sigma**2), dim=(1, 2, 3)) else: reconstruction_loss_i = -0.5 * torch.sum( (input_data - out_mu)**2, dim=(1, 2, 3)) ELBO_i = reconstruction_loss_i - KL_divergence_i loss = -torch.mean(ELBO_i) loss_hist.append(loss) # Backward propagation optimizer.zero_grad() loss.backward() # Update parameters optimizer.step() # Print progress if batch_ind % print_every == 0: train_log = 'Epoch {:03d}/{:03d}\tLoss: {:.6f}\t\tTrain: [{}/{} ({:.0f}%)] '.format( epoch + 1, epochs + resume_epoch, loss.cpu().item(), batch_ind + 1, len(train_loader), 100. * batch_ind / len(train_loader)) print(train_log, end='\r') sys.stdout.flush() # Learning rate decay scheduler.step(sum(loss_hist) / len(loss_hist)) # Save model every 20 epochs if (epoch + 1) % 20 == 0 and epoch + 1 != epochs: PATH = f'saved_model/{dataset}-{decoder_type}-e{epoch+1}-z{latent_dim}' + datetime.datetime.now( ).strftime("-%b-%d-%H-%M-%p") torch.save(autoencoder, PATH) print('\vTemporarily saved model to ' + PATH) # Display training result with test set data = f'-{decoder_type}-z{latent_dim}-e{epoch+1:03d}' with torch.no_grad(): autoencoder.eval() if decoder_type == 'Bernoulli': z_mu, z_sigma, p = autoencoder(first_test_batch, first_test_batch_label) output = torch.bernoulli(p) if latent_dim == 2: display_and_save_latent(autoencoder.z, first_test_batch_label, data) display_and_save_batch("Binarized-truth", first_test_batch, data, save=(epoch == 0)) display_and_save_batch("Mean-reconstruction", p, data, save=True) display_and_save_batch("Sampled-reconstruction", output, data, save=True) elif model_sigma: z_mu, z_sigma, out_mu, out_sigma = autoencoder( first_test_batch, first_test_batch_label) output = torch.normal(out_mu, out_sigma).clamp(0., 1.) if latent_dim == 2: display_and_save_latent(autoencoder.z, first_test_batch_label, data) display_and_save_batch("Truth", first_test_batch, data, save=(epoch == 0)) display_and_save_batch("Mean-reconstruction", out_mu, data, save=True) # display_and_save_batch("Sampled reconstruction", output, data, save=True) else: z_mu, z_sigma, out_mu = autoencoder(first_test_batch, first_test_batch_label) output = torch.normal(out_mu, torch.ones_like(out_mu)).clamp(0., 1.) if latent_dim == 2: display_and_save_latent(autoencoder.z, first_test_batch_label, data) display_and_save_batch("Truth", first_test_batch, data, save=(epoch == 0)) display_and_save_batch("Mean-reconstruction", out_mu, data, save=True) # display_and_save_batch("Sampled reconstruction", output, data, save=True) autoencoder.train() # Save final model PATH = f'saved_model/{dataset}-{decoder_type}-e{epochs+resume_epoch}-z{latent_dim}' + datetime.datetime.now( ).strftime("-%b-%d-%H-%M-%p") torch.save(autoencoder, PATH) print('\vSaved model to ' + PATH)
#divide data into training and test set num_train_data = int(len(molecules_input) * 0.75) train_molecules_input = molecules_input[0:num_train_data] test_molecules_input = molecules_input[num_train_data:-1] train_molecules_output = molecules_output[0:num_train_data] test_molecules_output = molecules_output[num_train_data:-1] train_labels = labels[0:num_train_data] test_labels = labels[num_train_data:-1] train_length = length[0:num_train_data] test_length = length[num_train_data:-1] model = CVAE(vocab_size, args) print('Number of parameters : ', np.sum([np.prod(v.shape) for v in tf.trainable_variables()])) for epoch in range(args.num_epochs): st = time.time() # Learning rate scheduling #model.assign_lr(learning_rate * (decay_rate ** epoch)) train_loss = [] test_loss = [] st = time.time() for iteration in range(len(train_molecules_input) // args.batch_size): n = np.random.randint(len(train_molecules_input), size=args.batch_size) x = np.array([train_molecules_input[i] for i in n])
def main(): config = Settings() # |TODO| go to Setting() train_filename = config.train_file # train_filename_1 = config.train_file_1 # train_filename_2 = config.train_file_2 test_filename = config.test_file dataset_path = os.path.join(os.getcwd(), config.path) if not os.path.exists(config.exp_dir): os.mkdir(config.exp_dir) model_dir = os.path.join(config.exp_dir, config.model_name) logger = SummaryWriter(model_dir) if config.data_type == 'success': # with open(os.path.join(dataset_path, train_filename), 'rb') as f: # train_dataset = pickle.load(f) # with open(os.path.join(dataset_path, test_filename), 'rb') as f: # test_dataset = pickle.load(f) dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') # test_dataset = glob.glob(f'{dataset_path}/{test_filename}/*.pickle') # train_dataset = dataset[:1500000] # test_dataset = dataset[-200000:] train_dataset = dataset[:-20000] test_dataset = dataset[-20000:] print('#trajectories of train_dataset:', len(train_dataset)) print('#trajectories of test_dataset:', len(test_dataset)) elif config.data_type == 'mcts': dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') train_dataset = dataset[:-20000] test_dataset = dataset[-20000:] # train_dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') # test_dataset = glob.glob(f'{dataset_path}/{test_filename}/*.pickle') if config.filter: filtered_data_train = [] filtered_data_test = [] total_reward_filt = [] total_reward_not_filt = [] avg_total_reward_not_filt = 0 avg_total_reward_filt = 0 for data in train_dataset: with open(data, 'rb') as f: traj = pickle.load(f) avg_total_reward_not_filt += traj[-1] total_reward_not_filt.append(traj[-1]) if traj[-1] > config.filter: filtered_data_train.append(data) avg_total_reward_filt += traj[-1] total_reward_filt.append(traj[-1]) for data in test_dataset: with open(data, 'rb') as f: traj = pickle.load(f) if traj[-1] > config.filter: filtered_data_test.append(data) total_reward_not_filt_std = np.std( np.asarray(total_reward_not_filt)) total_reward_filt_std = np.std(np.asarray(total_reward_filt)) print('Average of total reward(not filtered):', avg_total_reward_not_filt / len(train_dataset)) print('std of total reward(not filtered):', total_reward_not_filt_std) print('Average of total reward(filtered):', avg_total_reward_filt / len(filtered_data_train)) print('std of total reward(filtered):', total_reward_filt_std) train_dataset = filtered_data_train test_dataset = filtered_data_test print('#trajectories of train_dataset:', len(train_dataset)) print('#trajectories of test_dataset:', len(test_dataset)) # # For mixed dataset # train_dataset_1 = glob.glob(f'{dataset_path}/{train_filename_1}/*.pickle') # dataset_2 = glob.glob(f'{dataset_path}/{train_filename_2}/*.pickle') # train_dataset_2 = dataset_2[:100000] # test_dataset = dataset_2[100000:] # if config.filter: # filtered_data_train = [] # filtered_data_test = [] # total_reward_filt = [] # total_reward_not_filt = [] # avg_total_reward_not_filt = 0 # avg_total_reward_filt = 0 # for data in train_dataset_2: # with open(data, 'rb') as f: # traj = pickle.load(f) # avg_total_reward_not_filt += traj[-1] # total_reward_not_filt.append(traj[-1]) # if traj[-1] > config.filter: # filtered_data_train.append(data) # avg_total_reward_filt += traj[-1] # total_reward_filt.append(traj[-1]) # for data in test_dataset: # with open(data, 'rb') as f: # traj = pickle.load(f) # if traj[-1] > config.filter: # filtered_data_test.append(data) # total_reward_not_filt_std = np.std(np.asarray(total_reward_not_filt)) # total_reward_filt_std = np.std(np.asarray(total_reward_filt)) # print('Average of total reward(not filtered):', avg_total_reward_not_filt/len(train_dataset_2)) # print('std of total reward(not filtered):', total_reward_not_filt_std) # print('Average of total reward(filtered):', avg_total_reward_filt/len(filtered_data_train)) # print('std of total reward(filtered):', total_reward_filt_std) # train_dataset = train_dataset_1 + filtered_data_train # test_dataset = filtered_data_test # print('#trajectories of train_dataset:', len(train_dataset)) # print('#trajectories of test_dataset:', len(test_dataset)) # generate dataloader train_loader = get_loader(config, train_dataset) test_loader = get_loader(config, test_dataset) # model device = th.device(config.device) if config.model == 'GPT': model = GPT2(config).to(device) elif config.model == 'RNN': model = RNN(config).to(device) elif config.model == 'LSTM': model = LSTM(config).to(device) elif config.model == 'CVAE' or config.model == 'PolicyValueNet': model = CVAE(config).to(device) elif config.model == 'ValueNet': model = ValueNet(config).to(device) else: raise Exception( f'"{config.model}" is not support!! You should select "GPT", "RNN", "LSTM", "CVAE", "ValueNet", or "PolicyValueNet.' ) # optimizer optimizer = th.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) # learning rate scheduler if config.optimizer == 'AdamW': scheduler = th.optim.lr_scheduler.LambdaLR( optimizer, lambda step: min((step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': scheduler = CosineAnnealingWarmUpRestarts(optimizer=optimizer, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: raise Exception( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) # Metric # |TODO| implement Chamfer distance if config.model == 'CVAE': loss_fn = ELBOLoss(config) eval_fn = ELBOLoss(config) elif config.model == 'ValueNet': loss_fn = RegressionLossValue(config) eval_fn = RegressionLossValue(config) elif config.model == 'PolicyValueNet': loss_fn = None eval_fn = None else: loss_fn = RegressionLossPolicy(config) eval_fn = RegressionLossPolicy(config) # Trainer & Evaluator trainer = Trainer(config=config, loader=train_loader, model=model, optimizer=optimizer, scheduler=scheduler, loss_fn=loss_fn, eval_fn=eval_fn) evaluator = Evaluator(config=config, loader=test_loader, model=model, eval_fn=eval_fn) # save configuration config.save(model_dir + '/config.yaml') # Logging model graph dummy = next(iter(test_loader)) for k in dummy: dummy[k].to(device).detach() logger.add_graph(ModelAsTuple(config, model), dummy) start_epoch = 1 best_error = 10000. # load checkpoint for resuming if config.resume is not None: filename = os.path.join(model_dir, config.resume) if os.path.isfile(filename): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, filename, model, optimizer, scheduler) start_epoch += 1 print("Loaded checkpoint '{}' (epoch {})".format( config.resume, start_epoch)) else: raise Exception("No checkpoint found at '{}'".format( config.resume)) # load checkpoint for pre-trained if config.pre_trained is not None: pre_trained_path = os.path.join(config.exp_dir, config.pre_trained) if os.path.isfile(pre_trained_path): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, pre_trained_path, model, optimizer, scheduler) start_epoch = 1 print("Loaded checkpoint '{}'".format(config.pre_trained)) else: raise Exception("No checkpoint found at '{}'".format( config.resume)) for epoch in range(start_epoch, config.epochs + 1): print(f'===== Start {epoch} epoch =====') # Training one epoch print("Training...") train_loss, train_val = trainer.train(epoch) # Logging if config.model == 'CVAE': logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(Reconstruction)/train', train_loss['Recon'], epoch) logger.add_scalar('Loss(KL_divergence)/train', train_loss['KL_div'], epoch) elif config.model == 'ValueNet': logger.add_scalar('Loss/train', train_loss['total'], epoch) elif config.model == 'PolicyValueNet': logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(action)/train', train_loss['action'], epoch) logger.add_scalar('Loss(accumulated reward)/train', train_loss['accumulated_reward'], epoch) # logger.add_scalar('Eval(action)/train', train_val['action'], epoch) else: logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(action)/train', train_loss['action'], epoch) # if config.use_reward: # logger.add_scalar('Loss(reward)/train', train_loss['reward'], epoch) # logger.add_scalar('Eval(action)/train', train_val['action'], epoch) # if config.use_reward: # logger.add_scalar('Eval(reward)/train', train_val['reward'], epoch) # |FIXME| debug for eff_grad: "RuntimeError: Boolean value of Tensor with more than one value is ambiguous" log_gradients(model, logger, epoch, log_grad=config.log_grad, log_param=config.log_para, eff_grad=config.eff_grad, print_num_para=config.print_num_para) # evaluating if epoch % config.test_eval_freq == 0: print("Validating...") test_val = evaluator.eval(epoch) # save the best model # |TODO| change 'action' to 'total' @ trainer.py & evaluator.py -> merge 'CVAE' & others if config.model == 'CVAE' or config.model == 'ValueNet' or config.model == 'PolicyValueNet': if test_val['total'] < best_error: best_error = test_val['total'] save_checkpoint('Saving the best model!', os.path.join(model_dir, 'best.pth'), epoch, best_error, model, optimizer, scheduler) else: if test_val['action'] < best_error: best_error = test_val['action'] save_checkpoint('Saving the best model!', os.path.join(model_dir, 'best.pth'), epoch, best_error, model, optimizer, scheduler) # Logging if config.model == 'CVAE': logger.add_scalar('Eval(total)/test', test_val['total'], epoch) logger.add_scalar('Eval(Reconstruction)/test', test_val['Recon'], epoch) logger.add_scalar('Eval(KL_divergence)/test', test_val['KL_div'], epoch) elif config.model == 'ValueNet': logger.add_scalar('Eval/test', test_val['total'], epoch) elif config.model == 'PolicyValueNet': logger.add_scalar('Eval(total)/test', test_val['total'], epoch) logger.add_scalar('Eval(action)/test', test_val['action'], epoch) logger.add_scalar('Eval(accumulated reward)/test', test_val['accumulated_reward'], epoch) else: logger.add_scalar('Eval(action)/test', test_val['action'], epoch) # if config.use_reward: # logger.add_scalar('Eval(reward)/test', test_val['reward'], epoch) # save the model if epoch % config.save_freq == 0: save_checkpoint('Saving...', os.path.join(model_dir, f'ckpt_epoch_{epoch}.pth'), epoch, best_error, model, optimizer, scheduler) print(f'===== End {epoch} epoch =====')
for i in range(predictions.shape[0]): plt.subplot(4, 4, i + 1) plt.imshow(predictions[i, :, :, 0], cmap='gray') plt.axis('off') file_dir = './image/'+ dire if not os.path.exists(file_dir): os.makedirs(file_dir) plt.savefig(file_dir +'/image_at_epoch_{:04d}.png'.format(degree)) plt.close() def generate_images(model, data): fig = plt.figure(figsize=(4, 4)) for i in range(data.shape[0]): plt.subplot(4, 4, i + 1) plt.imshow(data[i, :, :, 0], cmap='gray') plt.axis('off') plt.show() for i in range(1, 6): model = CVAE(latent_dim=16, beta=i) checkpoint = tf.train.Checkpoint(model=model) checkpoint.restore("checkpoints/2_20method" + str(i) + "/ckpt-10") mean, logvar = model.encode(test_sample) r_m = np.identity(model.latent_dim) z = model.reparameterize(mean, logvar) theta = np.radians(60) c, s = np.cos(theta), np.sin(theta) r_m[0, [0, 1]], r_m[1, [0, 1]] = [c, s], [-s, c] rota_z = matvec(tf.cast(r_m, dtype=tf.float32), z) phi_z = model.decode(rota_z) generate_and_save_images(phi_z, 1, 'test3' + "/beta_test" + str(i))