def gen(args, model=None, max_len=15, top_p=True): device = init_device() corpus = utils.Corpus(args.data, args.persona_data) if model is None: vocab = len(corpus.dictionary) model = models.CVAE(vocab, args.embedding, args.hidden, args.latent) model.load_model() model = model.to(device) model.eval() generated_verses = {} for persona in corpus.personas: print("Artist {}".format(persona)) persona_tokens = corpus.personas[persona] p_len = torch.tensor([len(persona_tokens)]).long().to(device) p = torch.tensor([persona_tokens]).long().to(device) # 50 verses per artist, arbitrary artist_verses = [] for _ in range(50): generated_verse = [] ctxt = [1] # 16 bars per verse for _ in range(16): print(ctxt) out_sequence = ["S"] out_tokens = [] x_len = torch.tensor([len(ctxt)]).long().to(device) x = torch.tensor([ctxt]).long().to(device) hidden = model.infer_hidden(x, x_len, p, p_len) word = torch.ones([1, 1], dtype=torch.long, device=model.device()) while out_sequence[-1] != "L" and len(out_sequence) < max_len: word = model.embedding(word) outputs, hidden = model.decoder(word, hidden) outputs = F.log_softmax(model.out(outputs), dim=-1).squeeze() if top_p: outputs = top_p_filtering(outputs).unsqueeze(0) else: outputs = outputs.unsqueeze(0) # Get a random sample from output word = torch.multinomial(F.softmax(outputs, dim=-1), 1) out_tokens.append(word.item()) out_sequence.append( corpus.dictionary.idx2word[word.item()]) ctxt.extend(out_tokens) generated_verse.extend(out_sequence) artist_verses.append(generated_verse) generated_verses[persona] = artist_verses with open("verses.json", 'w') as verses_file: json.dump(generated_verses, verses_file)
def main(): print('load model: ' + args.net_type) args.outf = args.outf + args.net_type + '_' + args.dataset + '/' model = models.ResNet50(num_c=1) model.cuda() model = nn.DataParallel(model) vae = models.CVAE(d=32, z=2048) vae = nn.DataParallel(vae) save_model = torch.load(args.vae_path) model_dict = vae.state_dict() state_dict = { k: v for k, v in save_model.items() if k in model_dict.keys() } print(state_dict.keys()) model_dict.update(state_dict) vae.load_state_dict(model_dict) vae.cuda() vae.eval() if args.lr is None: args.lr = 1e-1 if args.lr_schedule is None: args.lr_schedule = '20' if args.num_epochs is None: args.num_epochs = 30 lr_drop_epochs = [ int(epoch_str) for epoch_str in args.lr_schedule.split(',') ] optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=2e-4) criterion = nn.BCEWithLogitsLoss() print('load target data: ', args.dataset) test_clean_data = torch.load(args.outf + 'test_clean_data_%s_%s_%s.pth' % (args.net_type, args.dataset, args.adv_type)) test_adv_data = torch.load(args.outf + 'test_adv_data_%s_%s_%s.pth' % (args.net_type, args.dataset, args.adv_type)) test_noisy_data = torch.load(args.outf + 'test_noisy_data_%s_%s_%s.pth' % (args.net_type, args.dataset, args.adv_type)) testset = torch.cat((test_clean_data, test_adv_data, test_noisy_data)) testlabel = torch.cat((torch.zeros(test_clean_data.size(0)), torch.ones(test_adv_data.size(0)), torch.zeros(test_noisy_data.size(0)))) def test(testset, testlabel, model, outf): total = 0 output = [] with torch.no_grad(): for data_index in range( int(np.floor(testset.size(0) / args.batch_size))): inputs = testset[total:total + args.batch_size].cuda() targets = testlabel[total:total + args.batch_size].cuda() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) outputs = model(inputs - vae(inputs)) outputs = torch.sigmoid(outputs).squeeze(dim=1) output.append(outputs) total += args.batch_size output = torch.cat(output) num_samples = output.shape[0] l1 = open('%s/confidence_TMP_In.txt' % outf, 'w') l2 = open('%s/confidence_TMP_Out.txt' % outf, 'w') for i in range(num_samples): if testlabel[i] == 0: l1.write("{}\n".format(-output[i])) else: l2.write("{}\n".format(-output[i])) l1.close() l2.close() results = callog.metric(outf, ['TMP']) mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100. * results['TMP']['TNR']), end='') print(' {val:6.2f}'.format(val=100. * results['TMP']['AUROC']), end='') print(' {val:6.2f}'.format(val=100. * results['TMP']['DTACC']), end='') print(' {val:6.2f}'.format(val=100. * results['TMP']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100. * results['TMP']['AUOUT']), end='') wandb.log({'TNR': 100. * results['TMP']['TNR']}) wandb.log({'AUROC': 100. * results['TMP']['AUROC']}) wandb.log({'DTACC': 100. * results['TMP']['DTACC']}) wandb.log({'AUIN': 100. * results['TMP']['AUIN']}) wandb.log({'AUOUT': 100. * results['TMP']['AUOUT']}) if args.detector_path is not None: model = torch.load(args.detector_path) model.cuda() model.eval() test(testset, testlabel, model, log_dir) sys.exit(0) print('start to train: ') train_clean_data = torch.load(args.outf + 'train_clean_data_%s_%s_%s.pth' % (args.net_type, args.dataset, args.adv_type)) train_adv_data = torch.load(args.outf + 'train_adv_data_%s_%s_%s.pth' % (args.net_type, args.dataset, args.adv_type)) train_noisy_data = torch.load(args.outf + 'train_noisy_data_%s_%s_%s.pth' % (args.net_type, args.dataset, args.adv_type)) trainset = torch.cat((train_clean_data, train_adv_data, train_noisy_data)) trainlabel = torch.cat((torch.zeros(train_clean_data.size(0)), torch.ones(train_adv_data.size(0)), torch.zeros(train_noisy_data.size(0)))) shuffle = torch.randperm(trainlabel.size(0)) trainset = trainset[shuffle] trainlabel = trainlabel[shuffle] start_epoch, iteration = 0, 0 for epoch in range(start_epoch, args.num_epochs): total = 0 model.train() # now we set the model to train mode lr = args.lr for lr_drop_epoch in lr_drop_epochs: if epoch >= lr_drop_epoch: lr *= 0.1 print(f'START EPOCH {epoch:04d} (lr={lr:.0e})') for data_index in range( int(np.floor(trainset.size(0) / args.batch_size))): if epoch < 5 and args.lr >= 0.1: lr = (iteration + 1) / ( 5 * np.floor(trainset.size(0) / args.batch_size)) * args.lr for param_group in optimizer.param_groups: param_group['lr'] = lr data = trainset[total:total + args.batch_size].cuda() label = trainlabel[total:total + args.batch_size].cuda() inputs = Variable(data, requires_grad=False) labels = Variable(label, requires_grad=False) total += args.batch_size iteration += 1 optimizer.zero_grad() logits = model(inputs - vae(inputs)) logits = logits.squeeze(dim=1) loss = criterion(logits, labels) loss.backward() optimizer.step() predicted = torch.round(torch.sigmoid(logits)) correct = predicted.eq(labels.data).cpu().sum() accuracy = correct / inputs.size(0) wandb.log({'loss': loss.item()}, step=iteration) wandb.log({'accuracy': accuracy.item()}, step=iteration) print(f'ITER {iteration:06d}', f'accuracy: {accuracy.item() * 100:5.1f}%', f'loss: {loss.item():.2f}', sep='\t') print(f'END EPOCH {epoch:04d}') if epoch % 10 == 0: print('BEGIN VALIDATION') model.eval() test(testset, testlabel, model, log_dir) checkpoint_fname = os.path.join(log_dir, f'{epoch:04d}.ckpt.pth') torch.save(model, checkpoint_fname) print('BEGIN VALIDATION') model.eval() test(testset, testlabel, model, log_dir) checkpoint_fname = os.path.join(log_dir, f'{epoch:04d}.ckpt.pth') torch.save(model, checkpoint_fname)
num_workers=2) test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) # Model print('\n[Phase 2] : Model setup') model = models.Wide_ResNet(28, 10, 0.3, 10) if use_cuda: model.cuda() model = nn.DataParallel(model) cudnn.benchmark = True vae = models.CVAE(d=32, z=2048) vae = nn.DataParallel(vae) save_model = torch.load(args.vae_path) model_dict = vae.state_dict() state_dict = {k: v for k, v in save_model.items() if k in model_dict.keys()} print(state_dict.keys()) model_dict.update(state_dict) vae.load_state_dict(model_dict) vae.cuda() vae.eval() criterion = nn.CrossEntropyLoss() if args.lr is None: args.lr = 1e-1 if args.lr_schedule is None:
def train(args): """ trains a model as specified by args """ seed_random(args.rand_seed) device = init_device() train_log, valid_log = init_logger(log_dir=args.log_dir) # TODO: set up load_data functions - be best if return a data loader corpus = utils.Corpus(args.data, args.persona_data) train_data = utils.load_data(corpus.train, batch_size=args.batch_size, num_workers=4) test_data = utils.load_data(corpus.test, batch_size=args.batch_size, num_workers=4) vocab = len(corpus.dictionary) model = models.CVAE(vocab, args.embedding, args.hidden, args.latent) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1) if args.continue_training: model.load_model() model = model.to(device) print("Training", model.name, "with #params:", model.parameters) loss = cvae_loss_function best = float("inf") global_step = 0 for epoch in range(args.num_epoch): losses = [] for x, x_len, p, p_len, y, y_len in train_data: # Now we need to make sure everything in the batch has same size x, x_len = x.to(device), x_len.to(device) p, p_len = p.to(device), p_len.to(device) y, y_len = y.to(device), y_len.to(device) # Should go from 1 to 0 of ~100k steps (after learned good LM) teach = 1 if global_step < 200_000 else .9995 res = model(x, x_len, p, p_len, y, y_len, teach) pred, bow_log, r_mu, r_log_var, p_mu, p_log_var = res eos_tensor = torch.empty(x.shape[0], 1).to(device) eos_tensor.fill_(corpus.dictionary.word2idx["L"]) gold = torch.cat([y, eos_tensor], dim=1).long() alph = min(max(0, (global_step - 10_000) / 60_000), 1) pred = pred.permute(0, 2, 1) # Get loss, normalized by batch size loss_val = loss(pred, gold, bow_log, r_mu, r_log_var, p_mu, p_log_var, alpha=alph) optimizer.zero_grad() loss_val.backward() if args.grad_clip > 0.0: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() scheduler.step() global_step += 1 losses.append(loss_val.detach().cpu().numpy()) if train_log is not None: train_log.add_scalar("loss", losses[-1], global_step) with torch.no_grad(): validation = eval_inference(model, corpus, test_data, valid_log, global_step) avg_l = np.mean(losses) print("epoch %-3d \t loss = %0.3f \t" % (epoch, avg_l)) if validation < best: print("Saving model!") best = validation model.save_model() print("Finished training, best model got: {} NLL".format(best))
def twod_viz(args, model=None): device = init_device() corpus = utils.Corpus(args.data, args.persona_data) if model is None: vocab = len(corpus.dictionary) model = models.CVAE(vocab, args.embedding, args.hidden, args.latent, rnn=args.rnn) model.load_model() model = model.to(device) model.eval() artist_names = [ "21 savage", '6ix9ine', 'dr dre', 'earl sweatshirt', 'ice cube', 'kanye west', 'kendrick lamar', 'kid cudi', 'pusha t', 'tyler the creator', ] artist_list = [2, 5, 23, 26, 36, 44, 46, 47, 67, 86] names = {} for name, id_ in zip(artist_names, artist_list): names[id_] = name latents = [] labels = [] for artist in artist_list: curr = [] persona = corpus.personas[artist] print("Artist {}".format(artist)) ctxt = ['S'] ctxt = [corpus.dictionary.word2idx[word] for word in ctxt] p_len = torch.tensor([len(persona)]).long().to(device) p = torch.tensor([persona]).long().to(device) x_len = torch.tensor([len(ctxt)]).long().to(device) x = torch.tensor([ctxt]).long().to(device) x_emb = model.embedding(x) p_emb = model.embedding(p) c_enc = model.contextualize(x_emb, x_len, p_emb, p_len) out_prior = model.priorlnorm(model.tanh(model.prior(c_enc))) p = model.p_mu_log_var(out_prior) p_mu, p_log_var = torch.split(p, model.latent_dim, dim=-1) latents.append(p_mu.cpu().numpy().squeeze()) labels.append(artist) latents = np.stack(latents) means = np.mean(latents, axis=0) print(means) latents = latents - means print(latents) print(latents.shape) labels = np.array(labels) print(labels.shape) # cm = plt.get_cmap('gist_rainbow') fig = plt.figure() # jet = cm = plt.get_cmap('gist_rainbow') # cNorm = colors.Normalize(vmin=0, vmax=10) # scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) for idx, cl in enumerate(np.unique(labels)): plt.scatter(x=latents[idx, 0] * 1000, y=latents[idx, 1] * 1000, label=artist_names[idx]) # plt.text(x=latents[idx, 0]*1000, # y=latents[idx, 1]*1000, # s=artist_names[idx], # alpha=0.9, # ) plt.legend(loc='upper left') plt.xlim(-2.75, 2.75) plt.ylim(-2.75, 2.75) plt.xlabel("Dim 1") plt.ylabel("Dim 2") plt.title("Artist Embeddings with IDs") plt.show() fig.savefig('my_figure.png')