Пример #1
0
def run_epoch(model, data, is_train=False, lr=1.0):
  """Runs the model on the given data."""
  if is_train:
    model.train()
  else:
    model.eval()
  epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
  start_time = time.time()
  hidden = model.init_hidden()
  costs = 0.0
  iters = 0
  for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)):
    inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous()).cuda()
    model.zero_grad()
    hidden = repackage_hidden(hidden)
    outputs, hidden = model(inputs, hidden)
    targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous()).cuda()
    tt = torch.squeeze(targets.view(-1, model.batch_size * model.num_steps))

    loss = criterion(outputs.view(-1, model.vocab_size), tt)
    costs += loss.data[0] * model.num_steps
    iters += model.num_steps

    if is_train:
      loss.backward()
      torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
      for p in model.parameters():
        p.data.add_(-lr, p.grad.data)
      if step % (epoch_size // 10) == 10:
        print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters),
                                                       iters * model.batch_size / (time.time() - start_time)))
  return np.exp(costs / iters)
Пример #2
0
def run_epoch(model, data, is_train=False, lr=1.0, device=torch.device('cpu')):
    """Runs the model on the given data."""
    if is_train:
        model.train()
    else:
        model.eval()
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()
    hidden = model.init_hidden()
    costs = 0.0
    iters = 0
    for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps, model.direction)):
        # I think x is the input to the LSTM and y is the expected output
        inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous()).to(device)
        model.zero_grad()
        hidden = repackage_hidden(hidden)
        outputs, hidden = model.forward(inputs=inputs, hidden=hidden)
        targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous()).to(device)  # Tranposes and puts target words in tensor
        tt = torch.squeeze(targets.view(-1, model.batch_size * model.num_steps))

        loss = criterion(outputs.view(-1, model.vocab_size), tt)  # Computes the cross entropy loss
        costs += loss.item() * model.num_steps  # was loss.data[0]  saves loss across iterations?
        iters += model.num_steps

        if is_train:
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
            for p in model.parameters():
                p.data.add_(-lr, p.grad.data)
            if step % (epoch_size // 10) == 10:
                print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters),
                                                                    iters * model.batch_size / (time.time() - start_time)))
    return np.exp(costs / iters)
Пример #3
0
def run_prediction(model, my_words, inputs, device=torch.device('cpu')):
    """
    Runs prediction on single query sequence.  Returns the predicted word, top hidden layer and top cell state.
    :param model: Container for the LSTM models
    :param my_words: Containter for converting between words and word_ids
    :param inputs: A single query sequence as a list of word ids
    :param device: Stores whether to use cuda (GPU)
    :return:
        last_word: string: the predicted next word
        h_f: torch tensor: The last top level hidden layer
        c_f: torch tensor: The last top level cell state.  Think this gives the best vectorization
    """

    working = np.array(inputs)
    if model.direction == 'backward':
        working = np.flip(working)
    working = working.reshape(1, -1)
    working = Variable(torch.from_numpy(working.astype(np.int64)).transpose(0, 1).contiguous()).to(device)

    model.eval()
    num_steps = len(inputs)
    batch_size = 1

    hidden = model.init_hidden(batch_size=batch_size)
    hidden = repackage_hidden(hidden)
    # output, ((num_layers * num_directions, batch, hidden_size), (num_layers * num_directions, batch, hidden_size))
    output, (h_n, c_n) = model.forward(inputs=working, hidden=hidden, num_steps=num_steps, batch_size=batch_size)
    # Change to (num_layers, num_directions, batch, hidden_size)
    h_n = h_n.view(model.num_layers, model.num_directions, batch_size, model.hidden_dim)
    h_n = torch.transpose(h_n, 1, 2)  # change to (num_layers, batch, num_directions, hidden_size)
    # Change to (num_layers, num_directions, batch, hidden_size)
    c_n = c_n.view(model.num_layers, model.num_directions, batch_size, model.hidden_dim)
    c_n = torch.transpose(c_n, 1, 2)  # change to (num_layers, batch, num_directions, hidden_size)

    # Pull out last word
    last_word = output[-1, 0]
    last_word = torch.argmax(last_word).tolist()
    if not isinstance(last_word, str):
        last_word = [last_word]
    last_word = my_words.word_ids_to_words(last_word)[0]  # Get the last word list then take the 0 index

    # Pull out the top final h_f and c_f
    h_f = h_n[model.lstm.num_layers - 1].view(-1)
    c_f = c_n[model.lstm.num_layers - 1].view(-1)

    return last_word, h_f, c_f
def run_epoch(model, data, is_train=False, lr=1.0, prt_out=False):
    """Runs the model on the given data."""
    model.train()

    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()
    hidden = model.init_hidden()
    costs = 0.0
    iters = 0

    for step, (x, y) in enumerate(train_iter):
        if len(x) < num_steps:
            continue
        inputs = Variable(x.contiguous()).cuda()
        # print("inputs", inputs)

        targets = Variable(y.contiguous()).cuda()
        # print("targets", targets.size())
        model.zero_grad()
        hidden = repackage_hidden(hidden)
        outputs, hidden = model(inputs, hidden)
        tt = torch.squeeze(targets.view(-1,
                                        model.batch_size * model.num_steps))
        if prt_out:
            for o in outputs:
                for w in o:
                    val, idx = torch.max(w, 0)
                    # print(m)
                    print(TEXT.vocab.itos[idx.data[0]], end=" ")
                print()
    loss = criterion(outputs.view(-1, model.vocab_size), tt)
    costs += loss.data[0] * model.num_steps
    iters += model.num_steps

    loss.backward()
    torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)

    for p in model.parameters():
        p.data.add_(-lr, p.grad.data)
    if step % 30 == 0:
        print("{} perplexity: {:8.2f} speed: {} wps".format(
            step * 1.0 / epoch_size, np.exp(costs / iters),
            iters * model.batch_size / (time.time() - start_time)))

    return np.exp(costs / iters)
Пример #5
0
def run_epoch(model, data, optimizer, is_train=False):
    """Runs one epoch on the give data."""
    if is_train:
        model.train()
    else:
        model.eval()
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()
    hidden = model.init_hidden()
    costs = 0.0
    iters = 0
    data_iterator = reader.ptb_iterator(data, model.batch_size,
                                        model.num_steps)
    for step, (x, y) in enumerate(data_iterator):
        inputs = Variable(
            torch.from_numpy(x.astype(np.int64)).transpose(
                0, 1).contiguous()).cuda()
        targets = Variable(
            torch.from_numpy(y.astype(np.int64)).transpose(
                0, 1).contiguous()).cuda()
        tt = torch.squeeze(targets.view(-1,
                                        model.batch_size * model.num_steps))
        optimizer.zero_grad()
        hidden = repackage_hidden(hidden)
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs.view(-1, model.vocab_size), tt)
        costs += loss.data[0] * model.num_steps
        iters += model.num_steps
        # Report perplexity for PTB or BPC otherwise
        metric = "perplexity" if args.data_set == "ptb" else "bpc"
        perf = np.exp(
            costs /
            iters) if args.data_set == "ptb" else 1.4427 * (costs / iters)
        if is_train:
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
            optimizer.step()
            if step % (epoch_size // 10) == 10:
                wps = iters * model.batch_size / (time.time() - start_time)
                print("{} : {} {:8.4f} speed: {} wps".format(
                    step * 1.0 / epoch_size, metric, perf, wps))
    return perf
Пример #6
0
def run_epoch(model, data, is_train=False, lr=1.0):
    """Runs the model on the given data."""
    if is_train:
        model.train()
    else:
        model.eval()
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()
    hidden = model.init_hidden()
    costs = 0.0
    iters = 0
    for step, (x, y) in enumerate(
            reader.ptb_iterator(data, model.batch_size, model.num_steps)):
        inputs = Variable(
            torch.from_numpy(x.astype(np.int64)).transpose(
                0, 1).contiguous()).cuda()
        model.zero_grad()
        hidden = repackage_hidden(hidden)

        num_steps_time, bs = inputs.size()

        indices = np.random.permutation(bs)

        targets = Variable(
            torch.from_numpy(y.astype(np.int64)).transpose(
                0, 1).contiguous()).cuda()

        if is_train:
            #alpha = 0.1
            lam = np.random.beta(args.mixup_alpha, args.mixup_alpha)
            #lam = np.random.uniform(0.95, 1.0)
            lam = Variable(
                torch.from_numpy(np.array([lam]).astype('float32')).cuda())

            targets = targets.permute(1, 0)
            target_shuffled = targets[indices]
            targets = targets.permute(1, 0).contiguous()
            target_shuffled = target_shuffled.permute(1, 0).contiguous()

            tt_shuffled = torch.squeeze(
                target_shuffled.view(-1, model.batch_size * model.num_steps))

        targets = Variable(
            torch.from_numpy(y.astype(np.int64)).transpose(
                0, 1).contiguous()).cuda()

        tt = torch.squeeze(targets.view(-1,
                                        model.batch_size * model.num_steps))

        if is_train:
            outputs, hidden = model(inputs, hidden, is_train, indices, lam)
            loss = lam * criterion(outputs.view(
                -1, model.vocab_size), tt) + (1 - lam) * criterion(
                    outputs.view(-1, model.vocab_size), tt_shuffled)
        else:
            outputs, hidden = model(inputs, hidden, False, None, None)
            loss = criterion(outputs.view(-1, model.vocab_size), tt)

        costs += loss.data[0] * model.num_steps
        iters += model.num_steps

        if is_train:
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
            for p in model.parameters():
                p.data.add_(-lr, p.grad.data)
            if step % (epoch_size // 10) == 10:
                print("{} perplexity: {:8.2f} speed: {} wps".format(
                    step * 1.0 / epoch_size, np.exp(costs / iters),
                    iters * model.batch_size / (time.time() - start_time)))
    return np.exp(costs / iters)