示例#1
0
def train():
    model.train()  # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i, bptt)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        log_interval = 200
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.2f} | '  # ms/batch {:5.2f} | '
                  'loss {:5.2f} | ppl {:8.2f} |'.format(
                      epoch,
                      batch,
                      len(train_data) // bptt,  # scheduler.get_lr()[0],
                      elapsed * 1000 / log_interval,
                      cur_loss,
                      math.exp(cur_loss)))
            sys.stdout.flush()
            total_loss = 0
            start_time = time.time()
示例#2
0
def evaluate(eval_model, data_source):
    eval_model.eval()  # Turn on the evaluation mode
    total_loss = 0.
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, bptt):
            data, targets = get_batch(data_source, i, bptt)
            output = eval_model(data)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
    return total_loss / (len(data_source) - 1)
示例#3
0
def train():
    import numpy as np

    model.train()  # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    ntokens = len(TEXT.vocab.stoi)

    times = []
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i, bptt)
        optimizer.zero_grad()
        start_time2 = time.time()
        output = model(data)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        end_time2 = time.time()
        times.append(end_time2 - start_time2)

        total_loss += loss.item()
        log_interval = 200

        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.2f} | '  # ms/batch {:5.2f} | '
                  'loss {:5.2f} | ppl {:8.2f} |'
                  'perplexity {:8.2f} |'.format(
                      epoch,
                      batch,
                      len(train_data) // bptt,  # scheduler.get_lr()[0],
                      elapsed * 1000 / log_interval,
                      cur_loss,
                      math.exp(cur_loss),
                      math.exp(cur_loss * emsize / bptt)))
            total_loss = 0
            start_time = time.time()
            # print('average time:', np.mean(times))
            times = []