Exemplo n.º 1
0
 def forward(self, queries, keys, values):
     # Shape of the output `queries` and `attention_weights`:
     # (no. of queries, no. of key-value pairs)
     queries = d2l.reshape(
         queries.repeat_interleave(keys.shape[1]), (-1, keys.shape[1]))
     self.attention_weights = nn.functional.softmax(
         -((queries - keys) * self.w)**2 / 2, dim=1)
     # Shape of `values`: (no. of queries, no. of key-value pairs)
     return torch.bmm(self.attention_weights.unsqueeze(1),
                      values.unsqueeze(-1)).reshape(-1)
Exemplo n.º 2
0
def batchify(data):
    max_len = max(len(c) + len(n) for _, c, n in data)
    centers, contexts_negatives, masks, labels = [], [], [], []
    for center, context, negative in data:
        cur_len = len(context) + len(negative)
        centers += [center]
        contexts_negatives += [context + negative + [0] * (max_len - cur_lne)]
        masks += [[1] * cur_len + [0] * (max_len - len(context))]
    return (d2l.reshape(torch.tensor(centers),
                        (-1, 1)), torch.tensor(contexts_negatives),
            torch.tensor(masks), torch.tensor(labels))
Exemplo n.º 3
0
def predict_ch8(prefix, num_preds, model, vocab, device):  #@save
    """Generate new characters following the `prefix`."""
    state = model.begin_state(batch_size=1, device=device)
    outputs = [vocab[prefix[0]]]
    get_input = lambda: d2l.reshape(torch.tensor(
        [outputs[-1]], device=device), (1, 1))
    for y in prefix[1:]:  # Warm-up period
        _, state = model(get_input(), state)
        outputs.append(vocab[y])
    for _ in range(num_preds):  # Predict `num_preds` steps
        y, state = model(get_input(), state)
        outputs.append(int(y.argmax(dim=1).reshape(1)))
    return ''.join([vocab.idx_to_token[i] for i in outputs])
Exemplo n.º 4
0
 def predict(self, prefix, num_preds, device):  #@save
     """Generate new characters following the `prefix`."""
     τ = prefix.shape[-1]
     state = self.begin_state(batch_size=1, device=device)
     outputs = torch.zeros(prefix.shape[0], τ + num_preds, device=device)
     prefix = prefix.reshape(-1, 1, τ)
     outputs[:, 0:τ] = prefix[:, 0, :]
     get_input = lambda i: d2l.reshape(outputs[:, i:i + τ], (-1, 1, τ))
     # for y in prefix[1:]:  # Warm-up period
     # _, state = net(get_input(), state)
     for i in range(num_preds):  # Predict `num_preds` steps
         y, state = self.forward(get_input(i), state)
         outputs[:, i + τ] = y.reshape(-1)
     return outputs
Exemplo n.º 5
0
def predict_ch8(prefix, num_preds, model, vocab, device):  #@save
    """Generate new characters following the `prefix`."""
    #print(prefix[0])='t'
    #print(prefix[1])='i'
    state = model.begin_state(batch_size=1, device=device)
    outputs = [vocab[prefix[0]]]  # =163
    #print(outputs)
    get_input = lambda: d2l.reshape(
        torch.tensor([outputs[-1]], device=device),
        (1, 1))  # convert to shape:`num_steps*batch_size`
    for y in prefix[1:]:  # Warm-up period, just update state without learing
        _, state = model(get_input(), state)
        outputs.append(vocab[y])
    for _ in range(num_preds):  # Predict `num_preds` steps
        y, state = model(get_input(), state)
        outputs.append(int(y.argmax(dim=1).reshape(1)))
    return ''.join([vocab.idx_to_token[i] for i in outputs])
Exemplo n.º 6
0
def predict_ch8(prefix, num_preds, net, vocab, device):  #@save
    """Generate new characters following the `prefix`."""
    state = net.begin_state(batch_size=1, device=device)
    outputs = [vocab[prefix[0]]]
    get_input = lambda: d2l.reshape(d2l.tensor([outputs[-1]], device=device),
                                    (1, 1))
    for y in prefix[1:]:  # Warm-up period
        _, state = net(get_input(), state)
        outputs.append(vocab[y])
    for _ in range(num_preds):  # Predict `num_preds` steps
        y, state = net(get_input(), state)
        # sample from multinomial instead of argmax
        # outputs.append(int(torch.multinomial(F.softmax(y, dim=1), num_samples=1).reshape(1)))
        # biased α = 2, must be integer
        # α = 3
        # outputs.append(int(torch.multinomial(F.softmax(y**α, dim=1), num_samples=1).reshape(1)))
        outputs.append(int(y.argmax(dim=1).reshape(1)))
    return ''.join([vocab.idx_to_token[i] for i in outputs])
Exemplo n.º 7
0
n_test


def plot_kernel_reg(y_hat):
    d2l.plot(x_test, [y_truth, y_hat], 'x', 'y', legend=['Truth', 'Pred'],
             xlim=[0, 5], ylim=[-1, 5])
    d2l.plt.plot(x_train, y_train, 'o', alpha=0.5);


y_hat = torch.repeat_interleave(y_train.mean(), n_test)
plot_kernel_reg(y_hat)


# Shape of `X_repeat`: (`n_test`, `n_train`), where each row contains the
# same testing inputs (i.e., same queries)
X_repeat = d2l.reshape(x_test.repeat_interleave(n_train), (-1, n_train))
# Note that `x_train` contains the keys. Shape of `attention_weights`:
# (`n_test`, `n_train`), where each row contains attention weights to be
# assigned among the values (`y_train`) given each query
attention_weights = nn.functional.softmax(-(X_repeat - x_train)**2 / 2, dim=1)
# Each element of `y_hat` is weighted average of values, where weights are
# attention weights
y_hat = d2l.matmul(attention_weights, y_train)
plot_kernel_reg(y_hat)


d2l.show_heatmaps(attention_weights.unsqueeze(0).unsqueeze(0),
                  xlabel='Sorted training inputs',
                  ylabel='Sorted testing inputs')

Exemplo n.º 8
0
from d2l import torch as d2l
import torch
import torch.nn as nn

T = 1000  # Generate a total of 1000 points
time = torch.arange(1, T + 1, dtype=torch.float32)
x = torch.sin(0.01 * time) + torch.normal(0, 0.2, (T,))
d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3))

tau = 4
features = torch.zeros((T - tau, tau))
for i in range(tau):
    features[:, i] = x[i: T - tau + i]
labels = d2l.reshape(x[tau:], (-1, 1))

batch_size, n_train = 16, 600
# Only the first `n_train` examples are used for training
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),batch_size, is_train=True)

# Function for initializing the weights of the network
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)

# A simple MLP
def get_net():
    net = nn.Sequential(nn.Linear(4, 10),
                        nn.ReLU(),
                        nn.Linear(10, 1))
    net.apply(init_weights)
    return net
Exemplo n.º 9
0
    print(predict('time traveller'))
    print(predict('traveller'))


# RUN SCRIPT
DEBUG = False

device = d2l.try_gpu()
num_epochs, lr = 500, 1
if DEBUG:
    device = 'cpu'
    num_epochs, lr = 100, 1

num_hiddens = 512
# batch_size, num_steps = 32, 35
batch_size, num_steps = 32, 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

X = d2l.reshape(d2l.arange(10), (2, 5))
net = RNNModelScratch(len(vocab), num_hiddens, device, get_params,
                      init_rnn_state, rnn)
state = net.begin_state(X.shape[0], device)
# Y, new_state = net(X.to(d2l.try_gpu()), state)
# Y.shape, len(new_state), new_state[0].shape

train_ch8(net, train_iter, vocab, lr, num_epochs, device)
print(net.params[-1])

# Default Performance Benchmarks:
# perplexity 1.0, 135269.0 tokens/sec on cuda:0
Exemplo n.º 10
0
import torch
from torch import nn
from RNNModel import Numeric

#@tab mxnet, pytorch
T = 1000  # Generate a total of 1000 points
time = d2l.arange(1, T + 1, dtype=d2l.float32)
x = d2l.sin(0.01 * time) + d2l.normal(0, 0.2, (T, ))
d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3))

#@tab mxnet, pytorch
tau = 30
features = d2l.zeros((T - tau, tau))
for i in range(tau):
    features[:, i] = x[i:T - tau + i]
labels = d2l.reshape(x[tau:], (-1, 1))

batch_size = 16
n_train = 600
n_train -= n_train % batch_size
# Only the first `n_train` examples are used for training
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),
                            batch_size,
                            is_train=True)


# Function for initializing the weights of the network
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
Exemplo n.º 11
0
    def __init__(self, vocab_size,num_hiddens,device,\
                get_params,init_state,forward_fn):
        self.vocab_size, self.num_hiddens = vocab_size, num_hiddens
        self.params = get_params(vocab_size, num_hiddens, device)
        self.init_state, self.forward_fn = init_state, forward_fn

    def __call__(self, X, state):
        X = F.one_hot(X.T, self.vocab_size).type(torch.float32)
        return self.forward_fn(X, state, self.params)

    def begin_state(self, batch_size, device):
        return self.init_state(batch_size, self.num_hiddens, device)


#%%
X = d2l.reshape(torch.arange(10), (2, 5))
num_hiddens = 512
model = RNNModelScatch(len(vocab), num_hiddens, d2l.try_gpu(), get_params,
                       init_rnn_state, rnn)
state = model.begin_state(X.shape[0], d2l.try_gpu())
Y, new_state = model(X.to(d2l.try_gpu()), state)

Y.shape, len(new_state), new_state[0].shape


# %%
def predict_ch8(prefix, num_preds, model, vocab, device):
    """generate new characters following the prefix"""
    state = model.begin_state(batch_size=1, device=device)
    outputs = [vocab[prefix[0]]]
    get_input = lambda:d2l.reshape(torch.tensor(\
Exemplo n.º 12
0
output.shape, len(state), state[0].shape, len(state[1]), state[1][0].shape

embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.1
batch_size, num_steps = 64, 10
lr, num_epochs, device = 0.005, 250, d2l.try_gpu()

train_iter, src_vocab, tgt_vocab = d2l.load_data_nmt(batch_size, num_steps)
encoder = d2l.Seq2SeqEncoder(len(src_vocab), embed_size, num_hiddens,
                             num_layers, dropout)
decoder = Seq2SeqAttentionDecoder(len(tgt_vocab), embed_size, num_hiddens,
                                  num_layers, dropout)
net = d2l.EncoderDecoder(encoder, decoder)
d2l.train_seq2seq(net, train_iter, lr, num_epochs, tgt_vocab, device)

engs = ['go .', "i lost .", 'he\'s calm .', 'i\'m home .']
fras = ['va !', 'j\'ai perdu .', 'il est calme .', 'je suis chez moi .']
for eng, fra in zip(engs, fras):
    translation, dec_attention_weight_seq = d2l.predict_seq2seq(
        net, eng, src_vocab, tgt_vocab, num_steps, device, True)
    print(f'{eng} => {translation}, ',
          f'bleu {d2l.bleu(translation, fra, k=2):.3f}')

attention_weights = d2l.reshape(
    d2l.concat([step[0][0][0] for step in dec_attention_weight_seq], 0),
    (1, 1, -1, num_steps))

# Plus one to include the end-of-sequence token
d2l.show_heatmaps(attention_weights[:, :, :, :len(engs[-1].split()) + 1].cpu(),
                  xlabel='Key posistions',
                  ylabel='Query posistions')