示例#1
0
from data import Dataset
from helpers import calc_acc, ep_time, get_config, get_pretrained_embedding, init_params

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = get_config(
    os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.yml"))
torch.manual_seed(config["seed"])
random.seed(config["seed"])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

MODEL_PATH = os.path.join(os.getcwd(), "models", config["model_name"] + ".pt")

dataloader = Dataset(config["max_len"], config["max_size"],
                     config["batch_size"], config["pad_token"])
train_iterator, test_iterator, valid_iterator = dataloader.get_iterator()
print("Loaded iterator, generating vocab...")
vocab = dataloader.get_vocab()
tokenizer = dataloader.get_tokenizer()

pad_idx = vocab[config["pad_token"]]
input_dim = len(vocab)


def train(model, iterator, optimizer, criterion):
    ep_loss, ep_acc = 0, 0

    model.train()

    for labels, text, lengths in iterator:
        labels, text = labels.to(device), text.to(device)