def __init__(self):
        super().__init__()
        # change things as required

        embedding_size = 512
        device = "cuda"
        self.prepare_data_own()
        self.model = Transformer(device, embedding_size, self.src_vocab_size,
                                 self.trg_vocab_size,
                                 self.src_pad_idx).to(device)

        # pad_idx = english_vocab.stoi["<pad>"]
        # criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
        self.loss = nn.CrossEntropyLoss(ignore_index=self.pad_idx)
示例#2
0
from train import train
from transfomer import Transformer

german, english, train_data, valid_data, test_data = getData()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32

# Model hyperparameters
src_vocab_size = len(german.vocab)
trg_vocab_size = len(english.vocab)
embedding_size = 512
src_pad_idx = english.vocab.stoi["<sos>"]
print(src_pad_idx)
print(english.vocab.itos[src_pad_idx])

model = Transformer(device, embedding_size, src_vocab_size, trg_vocab_size,
                    src_pad_idx).to(device)

load_model = True
save_model = True
learning_rate = 3e-4

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

if load_model:
    load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)

# sentence = "ein pferd geht unter einer brücke neben einem boot."
#
# translated_sentence = translate_sentence(
#     model, sentence, german, english, device, max_length=50
# )
示例#3
0
    print("orig", xv)
    v = out[i].numpy()
    print("[", end="")
    for index in v:
      print(xv[index] + ", ", end="")

    print("]")
  """


# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
embedding_size = 6
src_pad_idx = 2

ptrNet = Transformer(device, embedding_size,
                     src_pad_idx=src_pad_idx).to(device)

# ptrNet = PointerNetwork(config.HIDDEN_SIZE)
optimizer = optim.Adam(ptrNet.parameters(), lr=0.01)

program_starts = time.time()
for epoch in range(EPOCHS):

    evaluateWordSort(ptrNet, epoch + 1)

    train(ptrNet, optimizer, epoch + 1)
    evaluateWordSort(ptrNet, epoch + 1)

now = time.time()
print("It has been {0} seconds since the loop started".format(now -
                                                              program_starts))
示例#4
0
    trg = example.trg
    print(">> ", src)
    print("   ", trg)
# exit()

src_vocab_size = len(german_vocab)
trg_vocab_size = len(english_vocab)
print("src vocabulary size: ", src_vocab_size)
print("trg vocabulary size: ", trg_vocab_size)
embedding_size = 512
src_pad_idx = english_vocab.stoi["<pad>"]
print(src_pad_idx)
print(english_vocab.itos[src_pad_idx])
print("===============================after loading ")

model = Transformer(device, embedding_size, src_vocab_size, trg_vocab_size, src_pad_idx,
                    arch_flag = "END", syntax_embedding_size = 256).to(device)

load_model = False
save_model = True
learning_rate = 3e-4

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

if load_model:
    load_checkpoint(torch.load("RSWI_checkpoint.pth.tar"), model, optimizer)

# sentence = "ein pferd geht unter einer brücke neben einem boot."
#
# translated_sentence = translate_sentence(
#     model, sentence, german, english, device, max_length=50
# )
示例#5
0
# data = train_data[0:3]
# for example in data:
#     src = example.src
#     trg = example.trg
#     print(">>>> ", src)
#     print(spe_dec.decode(src))
#     print("     ", trg)
#     print(spe_dec.decode(trg))

src_vocab_size = len(spe_dec)
trg_vocab_size = len(spe_dec)
print("src vocabulary size: ", src_vocab_size)
print("trg vocabulary size: ", trg_vocab_size)
embedding_size = 256
src_pad_idx = spe_dec.pad_id()
print("pad_index = ", src_pad_idx)
print("===============================after loading")

model = Transformer(device, embedding_size, src_vocab_size, trg_vocab_size,
                    src_pad_idx).to(device)

load_model = False
save_model = True
learning_rate = 3e-4
# batch_size
batch_size = 32
num_batches = len(train_data.examples) / batch_size
train(num_batches, learning_rate, model, device, load_model, save_model,
      spe_dec, spe_dec, train_data, valid_data, test_data, batch_size)
# running on entire test data takes a while