def __init__(self): super().__init__() # change things as required embedding_size = 512 device = "cuda" self.prepare_data_own() self.model = Transformer(device, embedding_size, self.src_vocab_size, self.trg_vocab_size, self.src_pad_idx).to(device) # pad_idx = english_vocab.stoi["<pad>"] # criterion = nn.CrossEntropyLoss(ignore_index=pad_idx) self.loss = nn.CrossEntropyLoss(ignore_index=self.pad_idx)
from train import train from transfomer import Transformer german, english, train_data, valid_data, test_data = getData() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") batch_size = 32 # Model hyperparameters src_vocab_size = len(german.vocab) trg_vocab_size = len(english.vocab) embedding_size = 512 src_pad_idx = english.vocab.stoi["<sos>"] print(src_pad_idx) print(english.vocab.itos[src_pad_idx]) model = Transformer(device, embedding_size, src_vocab_size, trg_vocab_size, src_pad_idx).to(device) load_model = True save_model = True learning_rate = 3e-4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) if load_model: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer) # sentence = "ein pferd geht unter einer brücke neben einem boot." # # translated_sentence = translate_sentence( # model, sentence, german, english, device, max_length=50 # )
print("orig", xv) v = out[i].numpy() print("[", end="") for index in v: print(xv[index] + ", ", end="") print("]") """ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = "cpu" embedding_size = 6 src_pad_idx = 2 ptrNet = Transformer(device, embedding_size, src_pad_idx=src_pad_idx).to(device) # ptrNet = PointerNetwork(config.HIDDEN_SIZE) optimizer = optim.Adam(ptrNet.parameters(), lr=0.01) program_starts = time.time() for epoch in range(EPOCHS): evaluateWordSort(ptrNet, epoch + 1) train(ptrNet, optimizer, epoch + 1) evaluateWordSort(ptrNet, epoch + 1) now = time.time() print("It has been {0} seconds since the loop started".format(now - program_starts))
trg = example.trg print(">> ", src) print(" ", trg) # exit() src_vocab_size = len(german_vocab) trg_vocab_size = len(english_vocab) print("src vocabulary size: ", src_vocab_size) print("trg vocabulary size: ", trg_vocab_size) embedding_size = 512 src_pad_idx = english_vocab.stoi["<pad>"] print(src_pad_idx) print(english_vocab.itos[src_pad_idx]) print("===============================after loading ") model = Transformer(device, embedding_size, src_vocab_size, trg_vocab_size, src_pad_idx, arch_flag = "END", syntax_embedding_size = 256).to(device) load_model = False save_model = True learning_rate = 3e-4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) if load_model: load_checkpoint(torch.load("RSWI_checkpoint.pth.tar"), model, optimizer) # sentence = "ein pferd geht unter einer brücke neben einem boot." # # translated_sentence = translate_sentence( # model, sentence, german, english, device, max_length=50 # )
# data = train_data[0:3] # for example in data: # src = example.src # trg = example.trg # print(">>>> ", src) # print(spe_dec.decode(src)) # print(" ", trg) # print(spe_dec.decode(trg)) src_vocab_size = len(spe_dec) trg_vocab_size = len(spe_dec) print("src vocabulary size: ", src_vocab_size) print("trg vocabulary size: ", trg_vocab_size) embedding_size = 256 src_pad_idx = spe_dec.pad_id() print("pad_index = ", src_pad_idx) print("===============================after loading") model = Transformer(device, embedding_size, src_vocab_size, trg_vocab_size, src_pad_idx).to(device) load_model = False save_model = True learning_rate = 3e-4 # batch_size batch_size = 32 num_batches = len(train_data.examples) / batch_size train(num_batches, learning_rate, model, device, load_model, save_model, spe_dec, spe_dec, train_data, valid_data, test_data, batch_size) # running on entire test data takes a while