regulationMatrix = readLTMGnonsparse(args.LTMGDir, ltmgFile) regulationMatrix = torch.from_numpy(regulationMatrix) if args.precisionModel == 'Double': regulationMatrix = regulationMatrix.type(torch.DoubleTensor) elif args.precisionModel == 'Float': regulationMatrix = regulationMatrix.type(torch.FloatTensor) # Original if args.model == 'VAE': # model = VAE(dim=scData.features.shape[1]).to(device) model = VAE2d(dim=scData.features.shape[1]).to(device) elif args.model == 'AE': model = AE(dim=scData.features.shape[1]).to(device) if args.precisionModel == 'Double': model = model.double() optimizer = optim.Adam(model.parameters(), lr=1e-3) # Benchmark bench_pd = pd.read_csv(args.benchmark, index_col=0) # t1=pd.read_csv('/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',index_col=0) bench_celltype = bench_pd.iloc[:, 0].to_numpy() # whether to output debuginfo in running time and memory consumption def debuginfoStr(info): if args.debuginfo: print('---' + str(datetime.timedelta(seconds=int(time.time() - start_time))) + '---' + info) mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
torch.cuda.empty_cache() torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True BATCH = 64 LR = 2e-5 EPOCH = 50 if __name__ == "__main__": train_x = np.load(sys.argv[1]) train_x = preprocess(train_x) train_dataset = ImageDataset(train_x) model = AE().cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=LR) train_dataloader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True) for epoch in range(EPOCH): model.train() for data in train_dataloader: x = data.cuda() latents, reconst_x = model(x) loss = criterion(reconst_x, x) optimizer.zero_grad() loss.backward()
from model import AE import sys npy_path = sys.argv[1] model_path = sys.argv[2] trainX = np.load(npy_path) trainX_preprocessed, trainX_preprocessed2 = preprocess(trainX) img_dataset = Image_Dataset2(trainX_preprocessed) same_seeds(0) model = AE().cuda() criterion = nn.MSELoss() optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5, weight_decay=1e-5) #optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-5, momentum=0.9) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') # adjust lr model.train() n_epoch = 300 # 準備 dataloader, model, loss criterion 和 optimizer img_dataloader = DataLoader(img_dataset, batch_size=64, shuffle=True) # 主要的訓練過程 for epoch in range(n_epoch): for data in img_dataloader: img = data
save_dir = './save' if not os.path.exists(save_dir): os.makedirs(save_dir) #Log directory log_dir = './log' if not os.path.exists(log_dir): os.makedirs(log_dir) log_file = open(os.path.join(log_dir, 'loss.txt'), 'w') #Define model's training parameters lr = 0.0005 num_epochs = 5 ae = AE() criterion = nn.MSELoss() optimizer = torch.optim.Adam(ae.parameters(), lr=lr, weight_decay=1e-5) #Validation Loss def eval_loss(): val_loss = [] for batch_id, (x, label) in enumerate(train_loader): enc, dec = ae(x) loss = criterion(dec, x).item() val_loss.append(loss) avg_loss = sum(val_loss) / len(val_loss) return (avg_loss) #Train the model for epoch in range(num_epochs):
class BiAAE(object): def __init__(self, params): self.params = params self.tune_dir = "{}/{}-{}/{}".format(params.exp_id, params.src_lang, params.tgt_lang, params.norm_embeddings) self.tune_best_dir = "{}/best".format(self.tune_dir) self.X_AE = AE(params) self.Y_AE = AE(params) self.D_X = Discriminator(input_size=params.d_input_size, hidden_size=params.d_hidden_size, output_size=params.d_output_size) self.D_Y = Discriminator(input_size=params.d_input_size, hidden_size=params.d_hidden_size, output_size=params.d_output_size) self.nets = [self.X_AE, self.Y_AE, self.D_X, self.D_Y] self.loss_fn = torch.nn.BCELoss() self.loss_fn2 = torch.nn.CosineSimilarity(dim=1, eps=1e-6) def weights_init(self, m): # 正交初始化 if isinstance(m, torch.nn.Linear): torch.nn.init.orthogonal(m.weight) if m.bias is not None: torch.nn.init.constant(m.bias, 0.01) def weights_init2(self, m): # xavier_normal 初始化 if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_normal(m.weight) if m.bias is not None: torch.nn.init.constant(m.bias, 0.01) def weights_init3(self, m): # 单位阵初始化 if isinstance(m, torch.nn.Linear): m.weight.data.copy_( torch.diag(torch.ones(self.params.g_input_size))) def freeze(self, m): for p in m.parameters(): p.requires_grad = False def defreeze(self, m): for p in m.parameters(): p.requires_grad = True def init_state(self, seed=-1): if torch.cuda.is_available(): # Move the network and the optimizer to the GPU for net in self.nets: net.cuda() self.loss_fn = self.loss_fn.cuda() self.loss_fn2 = self.loss_fn2.cuda() print('Init3 the model...') self.X_AE.apply(self.weights_init) # 可更改G初始化方式 self.Y_AE.apply(self.weights_init) # 可更改G初始化方式 self.D_X.apply(self.weights_init2) #print(self.D_X.map1.weight) self.D_Y.apply(self.weights_init2) def train(self, src_dico, tgt_dico, src_emb, tgt_emb, seed): # Load data if not os.path.exists(self.params.data_dir): print("Data path doesn't exists: %s" % self.params.data_dir) if not os.path.exists(self.tune_dir): os.makedirs(self.tune_dir) if not os.path.exists(self.tune_best_dir): os.makedirs(self.tune_best_dir) src_word2id = src_dico[1] tgt_word2id = tgt_dico[1] en = src_emb it = tgt_emb #eval = Evaluator(self.params, en,it, torch.cuda.is_available()) AE_optimizer = optim.SGD(filter( lambda p: p.requires_grad, list(self.X_AE.parameters()) + list(self.Y_AE.parameters())), lr=self.params.g_learning_rate) D_optimizer = optim.SGD(list(self.D_X.parameters()) + list(self.D_Y.parameters()), lr=self.params.d_learning_rate) D_A_acc_epochs = [] D_B_acc_epochs = [] D_A_loss_epochs = [] D_B_loss_epochs = [] d_loss_epochs = [] G_AB_loss_epochs = [] G_BA_loss_epochs = [] G_AB_recon_epochs = [] G_BA_recon_epochs = [] g_loss_epochs = [] L_Z_loss_epoches = [] acc_epochs = [] criterion_epochs = [] best_valid_metric = -100 try: for epoch in range(self.params.num_epochs): D_A_losses = [] D_B_losses = [] G_AB_losses = [] G_AB_recon = [] G_BA_losses = [] G_adv_losses = [] G_BA_recon = [] L_Z_losses = [] d_losses = [] g_losses = [] hit_A = 0 hit_B = 0 total = 0 start_time = timer() # lowest_loss = 1e5 label_D = to_variable( torch.FloatTensor(2 * self.params.mini_batch_size).zero_()) label_D[:self.params. mini_batch_size] = 1 - self.params.smoothing label_D[self.params.mini_batch_size:] = self.params.smoothing label_G = to_variable( torch.FloatTensor(self.params.mini_batch_size).zero_()) label_G = label_G + 1 - self.params.smoothing for mini_batch in range( 0, self.params.iters_in_epoch // self.params.mini_batch_size): for d_index in range(self.params.d_steps): D_optimizer.zero_grad() # Reset the gradients self.D_X.train() self.D_Y.train() view_X, view_Y = self.get_batch_data_fast(en, it) # Discriminator X Y_Z = self.Y_AE.encode(view_Y).detach() fake_X = self.X_AE.decode(Y_Z).detach() input = torch.cat([view_X, fake_X], 0) pred_A = self.D_X(input) D_A_loss = self.loss_fn(pred_A, label_D) # Discriminator Y X_Z = self.X_AE.encode(view_X).detach() fake_Y = self.Y_AE.decode(X_Z).detach() input = torch.cat([view_Y, fake_Y], 0) pred_B = self.D_Y(input) D_B_loss = self.loss_fn(pred_B, label_D) D_loss = D_A_loss + self.params.gate * D_B_loss D_loss.backward( ) # compute/store gradients, but don't change params d_losses.append(to_numpy(D_loss.data)) D_A_losses.append(to_numpy(D_A_loss.data)) D_B_losses.append(to_numpy(D_B_loss.data)) discriminator_decision_A = to_numpy(pred_A.data) hit_A += np.sum( discriminator_decision_A[:self.params. mini_batch_size] >= 0.5) hit_A += np.sum( discriminator_decision_A[self.params. mini_batch_size:] < 0.5) discriminator_decision_B = to_numpy(pred_B.data) hit_B += np.sum( discriminator_decision_B[:self.params. mini_batch_size] >= 0.5) hit_B += np.sum( discriminator_decision_B[self.params. mini_batch_size:] < 0.5) D_optimizer.step( ) # Only optimizes D's parameters; changes based on stored gradients from backward() # Clip weights #_clip(self.D_X, self.params.clip_value) #_clip(self.D_Y, self.params.clip_value) sys.stdout.write( "[%d/%d] :: Discriminator Loss: %.3f \r" % (mini_batch, self.params.iters_in_epoch // self.params.mini_batch_size, np.asscalar(np.mean(d_losses)))) sys.stdout.flush() total += 2 * self.params.mini_batch_size * self.params.d_steps for g_index in range(self.params.g_steps): # 2. Train G on D's response (but DO NOT train D on these labels) AE_optimizer.zero_grad() self.D_X.eval() self.D_Y.eval() view_X, view_Y = self.get_batch_data_fast(en, it) # Generator X_AE ## adversarial loss X_Z = self.X_AE.encode(view_X) X_recon = self.X_AE.decode(X_Z) Y_fake = self.Y_AE.decode(X_Z) pred_Y = self.D_Y(Y_fake) L_adv_X = self.loss_fn(pred_Y, label_G) L_recon_X = 1.0 - torch.mean( self.loss_fn2(view_X, X_recon)) # Generator Y_AE # adversarial loss Y_Z = self.Y_AE.encode(view_Y) Y_recon = self.Y_AE.decode(Y_Z) X_fake = self.X_AE.decode(Y_Z) pred_X = self.D_X(X_fake) L_adv_Y = self.loss_fn(pred_X, label_G) ### autoAE Loss L_recon_Y = 1.0 - torch.mean( self.loss_fn2(view_Y, Y_recon)) # cross-lingual Loss L_Z = 1.0 - torch.mean(self.loss_fn2(X_Z, Y_Z)) G_loss = self.params.adv_weight * (self.params.gate*L_adv_X + L_adv_Y) + \ self.params.mono_weight * (L_recon_X+L_recon_Y) + \ self.params.cross_weight * L_Z G_loss.backward() g_losses.append(to_numpy(G_loss.data)) G_AB_losses.append(to_numpy(L_adv_X.data)) G_BA_losses.append(to_numpy(L_adv_Y.data)) G_adv_losses.append( to_numpy(L_adv_Y.data + L_adv_X.data)) G_AB_recon.append(to_numpy(L_recon_X.data)) G_BA_recon.append(to_numpy(L_recon_Y.data)) L_Z_losses.append(to_numpy(L_Z.data)) AE_optimizer.step() # Only optimizes G's parameters sys.stdout.write( "[%d/%d] :: Generator Loss: %.3f \r" % (mini_batch, self.params.iters_in_epoch // self.params.mini_batch_size, np.asscalar(np.mean(g_losses)))) sys.stdout.flush() '''for each epoch''' D_A_acc_epochs.append(hit_A / total) D_B_acc_epochs.append(hit_B / total) G_AB_loss_epochs.append(np.asscalar(np.mean(G_AB_losses))) G_BA_loss_epochs.append(np.asscalar(np.mean(G_BA_losses))) D_A_loss_epochs.append(np.asscalar(np.mean(D_A_losses))) D_B_loss_epochs.append(np.asscalar(np.mean(D_B_losses))) G_AB_recon_epochs.append(np.asscalar(np.mean(G_AB_recon))) G_BA_recon_epochs.append(np.asscalar(np.mean(G_BA_recon))) L_Z_loss_epoches.append(np.asscalar(np.mean(L_Z_losses))) d_loss_epochs.append(np.asscalar(np.mean(d_losses))) g_loss_epochs.append(np.asscalar(np.mean(g_losses))) print( "Epoch {} : Discriminator Loss: {:.3f}, Discriminator Accuracy: {:.3f}, Generator Loss: {:.3f}, Time elapsed {:.2f} mins" .format(epoch, np.asscalar(np.mean(d_losses)), 0.5 * (hit_A + hit_B) / total, np.asscalar(np.mean(g_losses)), (timer() - start_time) / 60)) if (epoch + 1) % self.params.print_every == 0: # No need for discriminator weights X_Z = self.X_AE.encode(Variable(en)).data Y_Z = self.Y_AE.encode(Variable(it)).data mstart_time = timer() for method in [self.params.eval_method]: results = get_word_translation_accuracy( self.params.src_lang, src_word2id, X_Z, self.params.tgt_lang, tgt_word2id, Y_Z, method=method, dico_eval=self.params.eval_file) acc1 = results[0][1] print('{} takes {:.2f}s'.format(method, timer() - mstart_time)) print('Method:{} score:{:.4f}'.format(method, acc1)) csls, size = dist_mean_cosine(self.params, X_Z, Y_Z) criterion = size if criterion > best_valid_metric: print("New criterion value: {}".format(criterion)) best_valid_metric = criterion fp = open( self.tune_best_dir + "/seed_{}_dico_{}_gate_{}_epoch_{}_acc_{:.3f}.tmp". format(seed, self.params.dico_build, self.params.gate, epoch, acc1), 'w') fp.close() torch.save( self.X_AE.state_dict(), self.tune_best_dir + '/seed_{}_dico_{}_gate_{}_best_X.t7'.format( seed, self.params.dico_build, self.params.gate)) torch.save( self.Y_AE.state_dict(), self.tune_best_dir + '/seed_{}_dico_{}_gate_{}_best_Y.t7'.format( seed, self.params.dico_build, self.params.gate)) torch.save( self.D_X.state_dict(), self.tune_best_dir + '/seed_{}_dico_{}_gate_{}_best_Dx.t7'.format( seed, self.params.dico_build, self.params.gate)) torch.save( self.D_Y.state_dict(), self.tune_best_dir + '/seed_{}_dico_{}_gate_{}__best_Dy.t7'.format( seed, self.params.dico_build, self.params.gate)) # Saving generator weights fp = open( self.tune_dir + "/seed_{}_gate_{}_epoch_{}_acc_{:.3f}.tmp".format( seed, self.params.gate, epoch, acc1), 'w') fp.close() acc_epochs.append(acc1) criterion_epochs.append(criterion) criterion_fb, epoch_fb = max([ (score, index) for index, score in enumerate(criterion_epochs) ]) fp = open( self.tune_best_dir + "/seed_{}_dico_{}_gate_{}_epoch_{}_Acc_{:.3f}_{:.4f}.cslsfb". format(seed, self.params.gate, self.params.dico_build, epoch_fb, acc_epochs[epoch_fb], criterion_fb), 'w') fp.close() # Save the plot for discriminator accuracy and generator loss fig = plt.figure() plt.plot(range(0, len(D_A_acc_epochs)), D_A_acc_epochs, color='b', label='D_A') plt.plot(range(0, len(D_B_acc_epochs)), D_B_acc_epochs, color='r', label='D_B') plt.ylabel('D_accuracy') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_D_acc.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(D_A_loss_epochs)), D_A_loss_epochs, color='b', label='D_A') plt.plot(range(0, len(D_B_loss_epochs)), D_B_loss_epochs, color='r', label='D_B') plt.ylabel('D_losses') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_D_loss.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(G_AB_loss_epochs)), G_AB_loss_epochs, color='b', label='G_AB') plt.plot(range(0, len(G_BA_loss_epochs)), G_BA_loss_epochs, color='r', label='G_BA') plt.ylabel('G_losses') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_G_loss.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(G_AB_recon_epochs)), G_AB_recon_epochs, color='b', label='G_AB') plt.plot(range(0, len(G_BA_recon_epochs)), G_BA_recon_epochs, color='r', label='G_BA') plt.ylabel('G_recon_loss') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_G_Recon.png'.format(seed)) # fig = plt.figure() # plt.plot(range(0, len(L_Z_loss_epoches)), L_Z_loss_epoches, color='b', label='L_Z') # plt.ylabel('L_Z_loss') # plt.xlabel('epochs') # plt.legend() # fig.savefig(tune_dir + '/seed_{}_L_Z.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(acc_epochs)), acc_epochs, color='b', label='trans_acc1') plt.ylabel('trans_acc') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_trans_acc.png'.format(seed)) ''' fig = plt.figure() plt.plot(range(0, len(csls_epochs)), csls_epochs, color='b', label='csls') plt.ylabel('csls') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_csls.png'.format(seed)) ''' fig = plt.figure() plt.plot(range(0, len(g_loss_epochs)), g_loss_epochs, color='b', label='G_loss') plt.ylabel('g_loss') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_g_loss.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(d_loss_epochs)), d_loss_epochs, color='b', label='csls') plt.ylabel('D_loss') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_d_loss.png'.format(seed)) plt.close('all') except KeyboardInterrupt: print("Interrupted.. saving model !!!") torch.save(self.X_AE.state_dict(), self.tune_dir + '/X_AE_model_interrupt.t7') torch.save(self.Y_AE.state_dict(), self.tune_dir + '/Y_AE_model_interrupt.t7') torch.save(self.D_X.state_dict(), self.tune_dir + '/D_X_model_interrupt.t7') torch.save(self.D_Y.state_dict(), self.tune_dir + '/D_y_model_interrupt.t7') exit() return def get_batch_data_fast(self, emb_en, emb_it): params = self.params random_en_indices = torch.LongTensor(params.mini_batch_size).random_( params.most_frequent_sampling_size) random_it_indices = torch.LongTensor(params.mini_batch_size).random_( params.most_frequent_sampling_size) en_batch = to_variable(emb_en)[random_en_indices.cuda()] it_batch = to_variable(emb_it)[random_it_indices.cuda()] return en_batch, it_batch
def main(argv): TRAIN, NOISE_TYPES, IMAGE_SIZE, FRAME_SIZE, OVERLAY_SIZE, LATENT_CLEAN_SIZE, BATCH_SIZE, EPOCHS, TEST = arguments_parsing(argv) if TRAIN: print('model training with parameters:\n'+ 'noise types = {}\n'.format(NOISE_TYPES)+ 'image size = {}\n'.format(IMAGE_SIZE)+ 'frame size = {}\n'.format(FRAME_SIZE)+ 'overlay size = {}\n'.format(OVERLAY_SIZE)+ 'latent clean size = {}\n'.format(LATENT_CLEAN_SIZE)+ 'batch size = {}\n'.format(BATCH_SIZE)+ 'number of epochs = {}\n'.format(EPOCHS)) # dataset table creating make_dataset_table(PATH_TO_DATA, NOISE_TYPES, PATH_TO_DATASET_TABLE) train_test_split(PATH_TO_DATASET_TABLE, test_size=0.2) # dataset and dataloader creating torch.manual_seed(0) transforms = [Compose([RandomHorizontalFlip(p=1.0), ToTensor()]), Compose([RandomVerticalFlip(p=1.0), ToTensor()]), Compose([ColorJitter(brightness=(0.9, 2.0), contrast=(0.9, 2.0)), ToTensor()])] train_dataset = [] for transform in transforms: dataset = DenoisingDataset(dataset=pd.read_csv(PATH_TO_DATASET_TABLE), image_size=IMAGE_SIZE, frame_size=FRAME_SIZE, overlay_size=OVERLAY_SIZE, phase='train', transform=transform) train_dataset = ConcatDataset([train_dataset, dataset]) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, # can be set to True only for train loader num_workers=0) # model training model = AE(1, LATENT_CLEAN_SIZE) loss = SSIMLoss() latent_loss = MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-3) model = train_model(model, train_loader, loss, latent_loss, optimizer, epochs=EPOCHS, device=DEVICE) # model saving path_to_model = './model' + '_{}'.format('_'.join([str(elem) for elem in NOISE_TYPES])) + '.pth' torch.save(model, path_to_model) if TEST: # model loading path_to_model = './model' + '_{}'.format('_'.join([str(elem) for elem in NOISE_TYPES])) + '.pth' print('{} testing...\n'.format(os.path.basename(path_to_model))) model = torch.load(path_to_model) dataset=pd.read_csv(PATH_TO_DATASET_TABLE) test_dataset = dataset[dataset['phase']=='test'] # model testing and results saving loss = SSIMLoss() latent_loss = MSELoss() print('{} evaluation on test images'.format(os.path.basename(path_to_model))) test_evaluation(model, test_dataset, loss, latent_loss, device=DEVICE) print() path_to_results = PATH_TO_RESULTS + '_{}'.format('_'.join([str(elem) for elem in NOISE_TYPES])) if not os.path.exists(path_to_results): os.makedirs(path_to_results) print('{} running and results saving'.format(os.path.basename(path_to_model))) test_model(model, test_dataset, path_to_results) print('process completed: OK')
data = data.to(device).view(-1, 28 * 28) label = label.to(device).view(-1, 28 * 28) _, recons_x = model(data) loss = criterion(recons_x, label) test_loss += loss.item() total += label.size(0) avg_loss = test_loss / total print('===> Test Average loss: {:.7f}\n'.format(avg_loss)) return avg_loss ae_model = AE() ae_model.to(device) optimizer = optim.Adam(ae_model.parameters(), lr=0.001, betas=(0.9, 0.999)) criterion = nn.MSELoss() batch_size = 256 train_loader, test_loader = get_customDataLoader( './data_for_ae/data_for_autoencoder.pth', batch_size=batch_size) trainer(ae_model, train_loader, test_loader, optimizer, criterion, save_path='./pretrained_models/autoencoder_pretrained_1.pth') #load_model(ae_model, './autoencoder_pretrained.pth') #test(ae_model, test_loader)
class CycleBWE(object): def __init__(self, params): self.params = params self.tune_dir = "{}/{}-{}/{}".format(params.exp_id, params.src_lang, params.tgt_lang, params.norm_embeddings) self.tune_best_dir = "{}/best".format(self.tune_dir) self.tune_export_dir = "{}/export".format(self.tune_dir) if self.params.eval_file == 'wiki': self.eval_file = '../data/bilingual_dicts/{}-{}.5000-6500.txt'.format( self.params.src_lang, self.params.tgt_lang) self.eval_file2 = '../data/bilingual_dicts/{}-{}.5000-6500.txt'.format( self.params.tgt_lang, self.params.src_lang) elif self.params.eval_file == 'wacky': self.eval_file = '../data/bilingual_dicts/{}-{}.test.txt'.format( self.params.src_lang, self.params.tgt_lang) self.eval_file2 = '../data/bilingual_dicts/{}-{}.test.txt'.format( self.params.tgt_lang, self.params.src_lang) else: print('Invalid eval file!') # self.seed = random.randint(0, 1000) # self.seed = 41 # self.initialize_exp(self.seed) self.X_AE = AE(params) self.Y_AE = AE(params) self.D_X = Discriminator(input_size=params.d_input_size, hidden_size=params.d_hidden_size, output_size=params.d_output_size) self.D_Y = Discriminator(input_size=params.d_input_size, hidden_size=params.d_hidden_size, output_size=params.d_output_size) self.nets = [self.X_AE, self.Y_AE, self.D_X, self.D_Y] self.loss_fn = torch.nn.BCELoss() self.loss_fn2 = torch.nn.CosineSimilarity(dim=1, eps=1e-6) def weights_init(self, m): # 正交初始化 if isinstance(m, torch.nn.Linear): torch.nn.init.orthogonal(m.weight) if m.bias is not None: torch.nn.init.constant(m.bias, 0.01) def weights_init2(self, m): # xavier_normal 初始化 if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_normal_(m.weight) if m.bias is not None: torch.nn.init.constant_(m.bias, 0.01) def weights_init3(self, m): # 单位阵初始化 if isinstance(m, torch.nn.Linear): m.weight.data.copy_( torch.diag(torch.ones(self.params.g_input_size))) def init_state(self, state=1): if torch.cuda.is_available(): # Move the network and the optimizer to the GPU for net in self.nets: net.cuda() self.loss_fn = self.loss_fn.cuda() self.loss_fn2 = self.loss_fn2.cuda() if self.params.init == 'eye': self.X_AE.apply(self.weights_init3) # 可更改G初始化方式 self.Y_AE.apply(self.weights_init3) # 可更改G初始化方式 elif self.params.init == 'orth': self.X_AE.apply(self.weights_init) # 可更改G初始化方式 self.Y_AE.apply(self.weights_init) else: print('Invalid init func!') #self.D_X.apply(self.weights_init2) #self.D_Y.apply(self.weights_init2) def orthogonalize(self, W): params = self.params W.copy_((1 + params.beta) * W - params.beta * W.mm(W.transpose(0, 1).mm(W))) def train(self, src_dico, tgt_dico, src_emb, tgt_emb, seed): params = self.params # Load data if not os.path.exists(params.data_dir): print("Data path doesn't exists: %s" % params.data_dir) if not os.path.exists(self.tune_dir): os.makedirs(self.tune_dir) if not os.path.exists(self.tune_best_dir): os.makedirs(self.tune_best_dir) if not os.path.exists(self.tune_export_dir): os.makedirs(self.tune_export_dir) src_word2id = src_dico[1] tgt_word2id = tgt_dico[1] en = src_emb it = tgt_emb params = _get_eval_params(params) self.params = params eval = Evaluator(params, en, it, torch.cuda.is_available()) # for seed_index in range(params.num_random_seeds): AE_optimizer = optim.SGD(filter( lambda p: p.requires_grad, list(self.X_AE.parameters()) + list(self.Y_AE.parameters())), lr=params.g_learning_rate) # AE_optimizer = optim.SGD(G_params, lr=0.1, momentum=0.9) # AE_optimizer = optim.Adam(G_params, lr=params.g_learning_rate, betas=(0.9, 0.9)) # AE_optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, list(self.X_AE.parameters()) + list(self.Y_AE.parameters())),lr=params.g_learning_rate,alpha=0.9) D_optimizer = optim.SGD(list(self.D_X.parameters()) + list(self.D_Y.parameters()), lr=params.d_learning_rate) # D_optimizer = optim.Adam(D_params, lr=params.d_learning_rate, betas=(0.5, 0.9)) # D_optimizer = optim.RMSprop(list(self.D_X.parameters()) + list(self.D_Y.parameters()), lr=params.d_learning_rate , alpha=0.9) # D_X=nn.DataParallel(D_X) # D_Y=nn.DataParallel(D_Y) # true_dict = get_true_dict(params.data_dir) D_A_acc_epochs = [] D_B_acc_epochs = [] D_A_loss_epochs = [] D_B_loss_epochs = [] G_AB_loss_epochs = [] G_BA_loss_epochs = [] G_AB_recon_epochs = [] G_BA_recon_epochs = [] L_Z_loss_epoches = [] acc1_epochs = [] acc2_epochs = [] csls_epochs = [] f_csls_epochs = [] b_csls_epochs = [] best_valid_metric = -100 # logs for plotting later log_file = open( "log_src_tgt.txt", "w") # Being overwritten in every loop, not really required log_file.write("epoch, dis_loss, dis_acc, g_loss\n") try: for epoch in range(self.params.num_epochs): D_A_losses = [] D_B_losses = [] G_AB_losses = [] G_AB_recon = [] G_BA_losses = [] G_adv_losses = [] G_BA_recon = [] L_Z_losses = [] d_losses = [] g_losses = [] hit_A = 0 hit_B = 0 total = 0 start_time = timer() # lowest_loss = 1e5 # label_D = to_variable(torch.FloatTensor(2 * params.mini_batch_size).zero_()) label_D = to_variable( torch.FloatTensor(2 * params.mini_batch_size).zero_()) label_D[:params.mini_batch_size] = 1 - params.smoothing label_D[params.mini_batch_size:] = params.smoothing label_G = to_variable( torch.FloatTensor(params.mini_batch_size).zero_()) label_G = label_G + 1 - params.smoothing for mini_batch in range( 0, params.iters_in_epoch // params.mini_batch_size): for d_index in range(params.d_steps): D_optimizer.zero_grad() # Reset the gradients self.D_X.train() self.D_Y.train() #print('D_X:', self.D_X.map1.weight.data) #print('D_Y:', self.D_Y.map1.weight.data) view_X, view_Y = self.get_batch_data_fast_new(en, it) # Discriminator X #print('View_Y',view_Y) fake_X = self.Y_AE.encode(view_Y).detach() #print('fakeX',fake_X) input = torch.cat([view_X, fake_X], 0) pred_A = self.D_X(input) #print('Pred_A',pred_A) D_A_loss = self.loss_fn(pred_A, label_D) # print(view_Y) # Discriminator Y # print('View_X',view_X) fake_Y = self.X_AE.encode(view_X).detach() # print('fakeY:',fake_Y) input = torch.cat([view_Y, fake_Y], 0) pred_B = self.D_Y(input) # print('Pred_B', pred_B) D_B_loss = self.loss_fn(pred_B, label_D) D_loss = (1.0) * D_A_loss + params.gate * D_B_loss D_loss.backward( ) # compute/store gradients, but don't change params d_losses.append(to_numpy(D_loss.data)) D_A_losses.append(to_numpy(D_A_loss.data)) D_B_losses.append(to_numpy(D_B_loss.data)) discriminator_decision_A = to_numpy(pred_A.data) hit_A += np.sum( discriminator_decision_A[:params.mini_batch_size] >= 0.5) hit_A += np.sum( discriminator_decision_A[params.mini_batch_size:] < 0.5) discriminator_decision_B = to_numpy(pred_B.data) hit_B += np.sum( discriminator_decision_B[:params.mini_batch_size] >= 0.5) hit_B += np.sum( discriminator_decision_B[params.mini_batch_size:] < 0.5) D_optimizer.step( ) # Only optimizes D's parameters; changes based on stored gradients from backward() # Clip weights _clip(self.D_X, params.clip_value) _clip(self.D_Y, params.clip_value) # print('D_loss',d_losses) sys.stdout.write( "[%d/%d] :: Discriminator Loss: %.3f \r" % (mini_batch, params.iters_in_epoch // params.mini_batch_size, np.asscalar(np.mean(d_losses)))) sys.stdout.flush() total += 2 * params.mini_batch_size * params.d_steps for g_index in range(params.g_steps): # 2. Train G on D's response (but DO NOT train D on these labels) AE_optimizer.zero_grad() self.D_X.eval() self.D_Y.eval() view_X, view_Y = self.get_batch_data_fast_new(en, it) # Generator X_AE ## adversarial loss Y_fake = self.X_AE.encode(view_X) # X_recon = self.X_AE.decode(X_Z) # Y_fake = self.Y_AE.encode(X_Z) pred_Y = self.D_Y(Y_fake) L_adv_X = self.loss_fn(pred_Y, label_G) X_Cycle = self.Y_AE.encode(Y_fake) L_Cycle_X = 1.0 - torch.mean( self.loss_fn2(view_X, X_Cycle)) # L_recon_X = 1.0 - torch.mean(self.loss_fn2(view_X, X_recon)) # L_G_AB = L_adv_X + params.recon_weight * L_recon_X # Generator Y_AE # adversarial loss X_fake = self.Y_AE.encode(view_Y) pred_X = self.D_X(X_fake) L_adv_Y = self.loss_fn(pred_X, label_G) ### Cycle Loss Y_Cycle = self.X_AE.encode(X_fake) L_Cycle_Y = 1.0 - torch.mean( self.loss_fn2(view_Y, Y_Cycle)) # L_recon_Y = 1.0 - torch.mean(self.loss_fn2(view_Y, Y_recon)) # L_G_BA = L_adv_Y + params.recon_weight * L_recon_Y # L_Z = 1.0 - torch.mean(self.loss_fn2(X_Z, Y_Z)) # G_loss = L_G_AB + L_G_BA + L_Z G_loss = params.adv_weight * ( params.gate * L_adv_X + (1.0) * L_adv_Y) + \ params.cycle_weight * (L_Cycle_X+L_Cycle_Y) G_loss.backward() g_losses.append(to_numpy(G_loss.data)) G_AB_losses.append(to_numpy(L_adv_X.data)) G_BA_losses.append(to_numpy(L_adv_Y.data)) G_adv_losses.append(to_numpy(L_adv_Y.data)) G_AB_recon.append(to_numpy(L_Cycle_X.data)) G_BA_recon.append(to_numpy(L_Cycle_Y.data)) AE_optimizer.step() # Only optimizes G's parameters self.orthogonalize(self.X_AE.map1.weight.data) self.orthogonalize(self.Y_AE.map1.weight.data) sys.stdout.write( "[%d/%d] :: Generator Loss: %.3f \r" % (mini_batch, params.iters_in_epoch // params.mini_batch_size, np.asscalar(np.mean(g_losses)))) sys.stdout.flush() '''for each epoch''' D_A_acc_epochs.append(hit_A / total) D_B_acc_epochs.append(hit_B / total) G_AB_loss_epochs.append(np.asscalar(np.mean(G_AB_losses))) G_BA_loss_epochs.append(np.asscalar(np.mean(G_BA_losses))) D_A_loss_epochs.append(np.asscalar(np.mean(D_A_losses))) D_B_loss_epochs.append(np.asscalar(np.mean(D_B_losses))) G_AB_recon_epochs.append(np.asscalar(np.mean(G_AB_recon))) G_BA_recon_epochs.append(np.asscalar(np.mean(G_BA_recon))) # L_Z_loss_epoches.append(np.asscalar(np.mean(L_Z_losses))) print( "Epoch {} : Discriminator Loss: {:.3f}, Discriminator Accuracy: {:.3f}, Generator Loss: {:.3f}, Time elapsed {:.2f} mins" .format(epoch, np.asscalar(np.mean(d_losses)), 0.5 * (hit_A + hit_B) / total, np.asscalar(np.mean(g_losses)), (timer() - start_time) / 60)) # lr decay # g_optim_state = AE_optimizer.state_dict() # old_lr = g_optim_state['param_groups'][0]['lr'] # g_optim_state['param_groups'][0]['lr'] = max(old_lr * params.lr_decay, params.lr_min) # AE_optimizer.load_state_dict(g_optim_state) # print("Changing the learning rate: {} -> {}".format(old_lr, g_optim_state['param_groups'][0]['lr'])) # d_optim_state = D_optimizer.state_dict() # d_optim_state['param_groups'][0]['lr'] = max( # d_optim_state['param_groups'][0]['lr'] * params.lr_decay, params.lr_min) # D_optimizer.load_state_dict(d_optim_state) # d_optim_state['param_groups'][0]['lr'] * params.lr_decay, params.lr_min) # D_optimizer.load_state_dict(d_optim_state) if (epoch + 1) % params.print_every == 0: # No need for discriminator weights # torch.save(d.state_dict(), 'discriminator_weights_en_es_{}.t7'.format(epoch)) # all_precisions = eval.get_all_precisions(G_AB(src_emb.weight).data) Vec_xy = self.X_AE.encode(Variable(en)) Vec_xyx = self.Y_AE.encode(Vec_xy) Vec_yx = self.Y_AE.encode(Variable(it)) Vec_yxy = self.X_AE.encode(Vec_yx) mstart_time = timer() # for method in ['csls_knn_10']: for method in [params.eval_method]: results = get_word_translation_accuracy( params.src_lang, src_word2id, Vec_xy.data, params.tgt_lang, tgt_word2id, it, method=method, dico_eval=self.eval_file, device=params.cuda_device) acc1 = results[0][1] results = get_word_translation_accuracy( params.tgt_lang, tgt_word2id, Vec_yx.data, params.src_lang, src_word2id, en, method=method, dico_eval=self.eval_file2, device=params.cuda_device) acc2 = results[0][1] print('{} takes {:.2f}s'.format( method, timer() - mstart_time)) print('Method:{} test_score:{:.4f}-{:.4f}'.format( method, acc1, acc2)) ''' # for method in ['csls_knn_10']: for method in [params.eval_method]: results = get_word_translation_accuracy( params.src_lang, src_word2id, Vec_xyx.data, params.src_lang, src_word2id, en, method=method, dico_eval='/data/dictionaries/{}-{}.wacky.dict'.format(params.src_lang,params.src_lang), device=params.cuda_device ) acc11 = results[0][1] # for method in ['csls_knn_10']: for method in [params.eval_method]: results = get_word_translation_accuracy( params.tgt_lang, tgt_word2id, Vec_yxy.data, params.tgt_lang, tgt_word2id, it, method=method, dico_eval='/data/dictionaries/{}-{}.wacky.dict'.format(params.tgt_lang,params.tgt_lang), device=params.cuda_device ) acc22 = results[0][1] print('Valid:{} score:{:.4f}-{:.4f}'.format(method, acc11, acc22)) avg_valid = (acc11+acc22)/2.0 # valid_x = torch.mean(self.loss_fn2(en, Vec_xyx.data)) # valid_y = torch.mean(self.loss_fn2(it, Vec_yxy.data)) # avg_valid = (valid_x+valid_y)/2.0 ''' # csls = 0 f_csls = eval.dist_mean_cosine(Vec_xy.data, it) b_csls = eval.dist_mean_cosine(Vec_yx.data, en) csls = (f_csls + b_csls) / 2.0 # csls = eval.calc_unsupervised_criterion(X_Z) if csls > best_valid_metric: print("New csls value: {}".format(csls)) best_valid_metric = csls fp = open( self.tune_dir + "/best/seed_{}_dico_{}_epoch_{}_acc_{:.3f}-{:.3f}.tmp" .format(seed, params.dico_build, epoch, acc1, acc2), 'w') fp.close() torch.save( self.X_AE.state_dict(), self.tune_dir + '/best/seed_{}_dico_{}_best_X.t7'.format( seed, params.dico_build)) torch.save( self.Y_AE.state_dict(), self.tune_dir + '/best/seed_{}_dico_{}_best_Y.t7'.format( seed, params.dico_build)) torch.save( self.D_X.state_dict(), self.tune_dir + '/best/seed_{}_dico_{}_best_Dx.t7'.format( seed, params.dico_build)) torch.save( self.D_Y.state_dict(), self.tune_dir + '/best/seed_{}_dico_{}_best_Dy.t7'.format( seed, params.dico_build)) # print(json.dumps(all_precisions)) # p_1 = all_precisions['validation']['adv']['without-ref']['nn'][1] # p_1 = all_precisions['validation']['adv']['without-ref']['csls'][1] # log_file.write(str(results) + "\n") # print('Method: nn score:{:.4f}'.format(acc)) # Saving generator weights # torch.save(X_AE.state_dict(), tune_dir+'/G_AB_seed_{}_mf_{}_lr_{}_p@1_{:.3f}.t7'.format(seed,params.most_frequent_sampling_size,params.g_learning_rate,acc)) # torch.save(Y_AE.state_dict(), tune_dir+'/G_BA_seed_{}_mf_{}_lr_{}_p@1_{:.3f}.t7'.format(seed,params.most_frequent_sampling_size,params.g_learning_rate,acc)) fp = open( self.tune_dir + "/seed_{}_epoch_{}_acc_{:.3f}-{:.3f}_valid_{:.4f}.tmp". format(seed, epoch, acc1, acc2, csls), 'w') fp.close() acc1_epochs.append(acc1) acc2_epochs.append(acc2) csls_epochs.append(csls) f_csls_epochs.append(f_csls) b_csls_epochs.append(b_csls) csls_fb, epoch_fb = max([ (score, index) for index, score in enumerate(csls_epochs) ]) fp = open( self.tune_dir + "/best/seed_{}_epoch_{}_{:.3f}_{:.3f}_{:.3f}.cslsfb".format( seed, epoch_fb, acc1_epochs[epoch_fb], acc2_epochs[epoch_fb], csls_fb), 'w') fp.close() csls_f, epoch_f = max([ (score, index) for index, score in enumerate(f_csls_epochs) ]) fp = open( self.tune_dir + "/best/seed_{}_epoch_{}_{:.3f}_{:.3f}_{:.3f}.cslsf".format( seed, epoch_f, acc1_epochs[epoch_f], acc2_epochs[epoch_f], csls_f), 'w') fp.close() csls_b, epoch_b = max([ (score, index) for index, score in enumerate(b_csls_epochs) ]) fp = open( self.tune_dir + "/best/seed_{}_epoch_{}_{:.3f}_{:.3f}_{:.3f}.cslsb".format( seed, epoch_b, acc1_epochs[epoch_b], acc2_epochs[epoch_b], csls_b), 'w') fp.close() ''' # Save the plot for discriminator accuracy and generator loss fig = plt.figure() plt.plot(range(0, len(D_A_acc_epochs)), D_A_acc_epochs, color='b', label='D_A') plt.plot(range(0, len(D_B_acc_epochs)), D_B_acc_epochs, color='r', label='D_B') plt.ylabel('D_accuracy') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_D_acc.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(D_A_loss_epochs)), D_A_loss_epochs, color='b', label='D_A') plt.plot(range(0, len(D_B_loss_epochs)), D_B_loss_epochs, color='r', label='D_B') plt.ylabel('D_losses') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_D_loss.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(G_AB_loss_epochs)), G_AB_loss_epochs, color='b', label='G_AB') plt.plot(range(0, len(G_BA_loss_epochs)), G_BA_loss_epochs, color='r', label='G_BA') plt.ylabel('G_losses') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_G_loss.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(G_AB_recon_epochs)), G_AB_recon_epochs, color='b', label='G_AB') plt.plot(range(0, len(G_BA_recon_epochs)), G_BA_recon_epochs, color='r', label='G_BA') plt.ylabel('G_Cycle_loss') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_G_Cycle.png'.format(seed)) # fig = plt.figure() # plt.plot(range(0, len(L_Z_loss_epoches)), L_Z_loss_epoches, color='b', label='L_Z') # plt.ylabel('L_Z_loss') # plt.xlabel('epochs') # plt.legend() # fig.savefig(tune_dir + '/seed_{}_stage_{}_L_Z.png'.format(seed,stage)) fig = plt.figure() plt.plot(range(0, len(acc1_epochs)), acc1_epochs, color='b', label='trans_acc1') plt.plot(range(0, len(acc2_epochs)), acc2_epochs, color='r', label='trans_acc2') plt.ylabel('trans_acc') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_trans_acc.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(csls_epochs)), csls_epochs, color='b', label='csls') plt.plot(range(0, len(f_csls_epochs)), f_csls_epochs, color='r', label='csls_f') plt.plot(range(0, len(b_csls_epochs)), b_csls_epochs, color='g', label='csls_b') plt.ylabel('csls') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_csls.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(g_losses)), g_losses, color='b', label='G_loss') plt.ylabel('g_loss') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_g_loss.png'.format(seed)) fig = plt.figure() plt.plot(range(0, len(d_losses)), d_losses, color='b', label='csls') plt.ylabel('D_loss') plt.xlabel('epochs') plt.legend() fig.savefig(self.tune_dir + '/seed_{}_d_loss.png'.format(seed)) plt.close('all') ''' except KeyboardInterrupt: print("Interrupted.. saving model !!!") torch.save(self.X_AE.state_dict(), 'g_model_interrupt.t7') torch.save(self.D_X.state_dict(), 'd_model_interrupt.t7') log_file.close() exit() log_file.close() return self.X_AE def get_batch_data_fast_new(self, emb_en, emb_it): params = self.params random_en_indices = torch.LongTensor(params.mini_batch_size).random_( params.most_frequent_sampling_size) random_it_indices = torch.LongTensor(params.mini_batch_size).random_( params.most_frequent_sampling_size) #print(random_en_indices) #print(random_it_indices) en_batch = to_variable(emb_en)[random_en_indices.cuda()] it_batch = to_variable(emb_it)[random_it_indices.cuda()] return en_batch, it_batch def export(self, src_dico, tgt_dico, emb_en, emb_it, seed, export_emb=False): params = _get_eval_params(self.params) eval = Evaluator(params, emb_en, emb_it, torch.cuda.is_available()) # Export adversarial dictionaries optim_X_AE = AE(params).cuda() optim_Y_AE = AE(params).cuda() print('Loading pre-trained models...') optim_X_AE.load_state_dict( torch.load(self.tune_dir + '/best/seed_{}_dico_{}_best_X.t7'.format( seed, params.dico_build))) optim_Y_AE.load_state_dict( torch.load(self.tune_dir + '/best/seed_{}_dico_{}_best_Y.t7'.format( seed, params.dico_build))) X_Z = optim_X_AE.encode(Variable(emb_en)).data Y_Z = optim_Y_AE.encode(Variable(emb_it)).data mstart_time = timer() for method in ['nn', 'csls_knn_10']: results = get_word_translation_accuracy(params.src_lang, src_dico[1], X_Z, params.tgt_lang, tgt_dico[1], emb_it, method=method, dico_eval=self.eval_file, device=params.cuda_device) acc1 = results[0][1] results = get_word_translation_accuracy(params.tgt_lang, tgt_dico[1], Y_Z, params.src_lang, src_dico[1], emb_en, method=method, dico_eval=self.eval_file2, device=params.cuda_device) acc2 = results[0][1] # csls = 0 print('{} takes {:.2f}s'.format(method, timer() - mstart_time)) print('Method:{} score:{:.4f}-{:.4f}'.format(method, acc1, acc2)) f_csls = eval.dist_mean_cosine(X_Z, emb_it) b_csls = eval.dist_mean_cosine(Y_Z, emb_en) csls = (f_csls + b_csls) / 2.0 print("Seed:{},ACC:{:.4f}-{:.4f},CSLS_FB:{:.6f}".format( seed, acc1, acc2, csls)) #''' print('Building dictionaries...') params.dico_build = "S2T&T2S" params.dico_method = "csls_knn_10" X_Z = X_Z / X_Z.norm(2, 1, keepdim=True).expand_as(X_Z) emb_it = emb_it / emb_it.norm(2, 1, keepdim=True).expand_as(emb_it) f_dico_induce = build_dictionary(X_Z, emb_it, params) f_dico_induce = f_dico_induce.cpu().numpy() Y_Z = Y_Z / Y_Z.norm(2, 1, keepdim=True).expand_as(Y_Z) emb_en = emb_en / emb_en.norm(2, 1, keepdim=True).expand_as(emb_en) b_dico_induce = build_dictionary(Y_Z, emb_en, params) b_dico_induce = b_dico_induce.cpu().numpy() f_dico_set = set([(a, b) for a, b in f_dico_induce]) b_dico_set = set([(b, a) for a, b in b_dico_induce]) intersect = list(f_dico_set & b_dico_set) union = list(f_dico_set | b_dico_set) with io.open( self.tune_dir + '/export/{}-{}.dict'.format(params.src_lang, params.tgt_lang), 'w', encoding='utf-8', newline='\n') as f: for item in f_dico_induce: f.write('{} {}\n'.format(src_dico[0][item[0]], tgt_dico[0][item[1]])) with io.open( self.tune_dir + '/export/{}-{}.dict'.format(params.tgt_lang, params.src_lang), 'w', encoding='utf-8', newline='\n') as f: for item in b_dico_induce: f.write('{} {}\n'.format(tgt_dico[0][item[0]], src_dico[0][item[1]])) with io.open(self.tune_dir + '/export/{}-{}.intersect'.format( params.src_lang, params.tgt_lang), 'w', encoding='utf-8', newline='\n') as f: for item in intersect: f.write('{} {}\n'.format(src_dico[0][item[0]], tgt_dico[0][item[1]])) with io.open(self.tune_dir + '/export/{}-{}.intersect'.format( params.tgt_lang, params.src_lang), 'w', encoding='utf-8', newline='\n') as f: for item in intersect: f.write('{} {}\n'.format(tgt_dico[0][item[1]], src_dico[0][item[0]])) with io.open( self.tune_dir + '/export/{}-{}.union'.format(params.src_lang, params.tgt_lang), 'w', encoding='utf-8', newline='\n') as f: for item in union: f.write('{} {}\n'.format(src_dico[0][item[0]], tgt_dico[0][item[1]])) with io.open( self.tune_dir + '/export/{}-{}.union'.format(params.tgt_lang, params.src_lang), 'w', encoding='utf-8', newline='\n') as f: for item in union: f.write('{} {}\n'.format(tgt_dico[0][item[1]], src_dico[0][item[0]])) if export_emb: print('Exporting {}-{}.{}'.format(params.src_lang, params.tgt_lang, params.src_lang)) loader.export_embeddings( src_dico[0], X_Z, path=self.tune_dir + '/export/{}-{}.{}'.format( params.src_lang, params.tgt_lang, params.src_lang), eformat='txt') print('Exporting {}-{}.{}'.format(params.src_lang, params.tgt_lang, params.tgt_lang)) loader.export_embeddings( tgt_dico[0], emb_it, path=self.tune_dir + '/export/{}-{}.{}'.format( params.src_lang, params.tgt_lang, params.tgt_lang), eformat='txt') print('Exporting {}-{}.{}'.format(params.tgt_lang, params.src_lang, params.tgt_lang)) loader.export_embeddings( tgt_dico[0], Y_Z, path=self.tune_dir + '/export/{}-{}.{}'.format( params.tgt_lang, params.src_lang, params.tgt_lang), eformat='txt') print('Exporting {}-{}.{}'.format(params.tgt_lang, params.src_lang, params.src_lang)) loader.export_embeddings( src_dico[0], emb_en, path=self.tune_dir + '/export/{}-{}.{}'.format( params.tgt_lang, params.src_lang, params.src_lang), eformat='txt')
def train(*, folder=None, dataset='mnist', patch_size=8, resume=False, log_interval=1, device='cpu', objective='vae', batch_size=64, nz=100, lr=0.001, num_workers=1, nb_filters=64, nb_draw_layers=1): if folder is None: folder = f'results/{dataset}/{patch_size}x{patch_size}' try: os.makedirs(folder) except Exception: pass act = 'sigmoid' nb_epochs = 3000 dataset = load_dataset(dataset, split='train') if patch_size is not None: patch_size = int(patch_size) dataset = PatchDataset(dataset, patch_size) x0, _ = dataset[0] nc = x0.size(0) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, ) if resume: net = torch.load('{}/net.th'.format(folder)) else: net = AE( latent_size=nz, nc=nc, w=patch_size, ndf=nb_filters, act=act, objective=objective, ) opt = optim.Adam(net.parameters(), lr=lr) net = net.to(device) niter = 0 for epoch in range(nb_epochs): for i, (X, _), in enumerate(dataloader): net.zero_grad() X = X.to(device) Xrec, mu, logvar = net(X) rec, kld = net.loss_function(X, Xrec, mu, logvar) loss = rec + kld loss.backward() opt.step() if niter % log_interval == 0: print( f'Epoch: {epoch:05d}/{nb_epochs:05d} iter: {niter:05d} loss: {loss.item():.2f} rec: {rec.item():.2f} kld:{kld.item():.2f}' ) if niter % 100 == 0: Xsamples = net.sample(nb_examples=100) X = 0.5 * (X + 1) if act == 'tanh' else X Xrecs = 0.5 * (Xrec + 1) if act == 'tanh' else Xrec Xsamples = 0.5 * (Xsamples + 1) if act == 'tanh' else Xsamples X = X.detach().to('cpu').numpy() Xrecs = Xrecs.detach().to('cpu').numpy() Xsamples = Xsamples.detach().to('cpu').numpy() imsave(f'{folder}/real_samples.png', grid_of_images_default(X)) imsave(f'{folder}/rec_samples.png', grid_of_images_default(Xrecs)) imsave(f'{folder}/fake_samples.png', grid_of_images_default(Xsamples)) torch.save(net, '{}/net.th'.format(folder)) niter += 1
z = random.randint(1, 10) ind_x = list(range(z + 80, z + 90, 1)) # ind_y = [int((a-50)*(a-50)*0.2 + a + 1) for a in ind_x] ind_y = [int(-1 * a + 101) for a in ind_x] # Change these things to make shape of training data index = [a + b * d for (a, b) in zip(ind_x, ind_y)] print(ind_x, ind_y, index) # index = torch.randperm(d*d)[:20] --> If you want to select random location for training data index = torch.randperm(d * d)[:16] uncertainty_data = uncertainty_data.reshape((d * d, 2)) train_data = uncertainty_data[index, :] ### Model and optimizer initialization model = AE() optim = torch.optim.Adam(model.parameters()) criterion = torch.nn.MSELoss() ### Training for i in range( 1500): # change 1000 to 0 if you want to see not-trained version. td = train_data[torch.randperm(train_data.size(0))] for batch in td.split(4, dim=0): out = model(batch) # train_data loss = criterion(out, batch) # train_data optim.zero_grad() loss.backward() optim.step() if i % 500 == 0: print(i, loss.item())