def train_and_pred(train_features, test_features, train_labels, test_data,
                   num_epochs, lr, weight_decay, batch_size, columnName):
    net = get_net(train_features.shape[1])
    train_ls, _ = train(net, train_features, train_labels, None, None,
                        num_epochs, lr, weight_decay, batch_size)
    d2l.plot(np.arange(1, num_epochs + 1), [train_ls],
             xlabel='epoch',
             ylabel='log rmse',
             xlim=[1, num_epochs],
             yscale='log')
    d2l.plt.show()
    print(f'train log rmse {float(train_ls[-1]):f}')
    preds = net(test_features).detach().numpy()
    num = test_data['filename'].shape[0]
    d2l.plot(list(range(1, num + 1)), [test_data[columnName], preds],
             xlabel='',
             ylabel='APS',
             xlim=[1, num],
             legend=['真实值', '预测值'])
    d2l.plt.rcParams['font.sans-serif'] = 'SimHei'
    d2l.plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示符号
    d2l.plt.show()
    submission = pd.concat(
        [test_data['filename'],
         pd.Series(preds.reshape(1, -1)[0])], axis=1)
    submission.to_csv('submission.csv', index=False)
Пример #2
0
def plot_kernel_reg(y_hat):
    d2l.plot(x_test, [y_truth, y_hat],
             'x',
             'y',
             legend=['Truth', 'Pred'],
             xlim=[0, 5],
             ylim=[-1, 5])
    d2l.plt.plot(x_train, y_train, 'o', alpha=0.5)
def test_PositionEncoding():
    encoding_dim, num_steps = 32, 60
    pos_encoding = PositionEncoding(encoding_dim, 0)
    pos_encoding.eval()
    X = pos_encoding(torch.zeros((1, num_steps, encoding_dim)))
    P = pos_encoding.P[:, :X.shape[1], :]
    d2l.plot(torch.arange(num_steps), P[0, :, 6:10].T, xlabel='Row (position)',
             figsize=(6, 3.5), legend=["Col %d" % d for d in torch.arange(6, 10)])
    d2l.plt.show()
Пример #4
0
def train_and_pred(train_features, test_feature, train_labels, test_data,
                   num_epochs, lr, weight_decay, batch_size):
    net = get_net()
    train_ls, _ = train(net, train_features, train_labels, None, None,
                        num_epochs, lr, weight_decay, batch_size)
    d2l.plot(np.arange(1, num_epochs + 1), [train_ls], xlabel='epoch',
             ylabel='log rmse', xlim=[1, num_epochs], yscale='log')
    print(f'train log rmse {float(train_ls[-1]):f}')
    # Apply the network to the test set
    preds = net(test_features).detach().numpy()
    # Reformat it to export to Kaggle
    test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0])
    submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1)
    submission.to_csv('submission.csv', index=False)
Пример #5
0
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay,
           batch_size):
    train_l_sum, valid_l_sum = 0, 0
    for i in range(k):
        data = get_k_fold_data(k, i, X_train, y_train)
        net = get_net()
        train_ls, valid_ls = train(net, *data, num_epochs, learning_rate,
                                   weight_decay, batch_size)
        train_l_sum += train_ls[-1]
        valid_l_sum += valid_ls[-1]
        if i == 0:
            d2l.plot(list(range(1, num_epochs + 1)), [train_ls, valid_ls],
                     xlabel='epoch', ylabel='rmse', xlim=[1, num_epochs],
                     legend=['train', 'valid'], yscale='log')
        print(f'fold {i + 1}, train log rmse {float(train_ls[-1]):f}, '
              f'valid log rmse {float(valid_ls[-1]):f}')
    return train_l_sum / k, valid_l_sum / k
Пример #6
0
#%%
#Vanishing gradient
%matplotlib inline
import torch
from d2l import torch as d2l
from matplotlib import pyplot as plt
import numpy as np
x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)
y = torch.sigmoid(x)
y.backward(torch.ones_like(x))

d2l.plot(x.detach().numpy(), [y.detach().numpy(), x.grad.numpy()],
         legend=['sigmoid', 'gradient'], figsize=(4.5, 2.5))


plt.figure(figsize=(8, 4))
plt.plot(x.detach(), y.detach())
plt.plot(x.detach(), x.grad.detach())
plt.legend(['sigmoid', 'gradient'])
plt.show()


# %%
#Gradient Exploding
M = torch.normal(0, 1, size=(4, 4))
print('a single matrix \n', M)
for i in range(100):
    M = torch.mm(M, torch.normal(0, 1, size=(4, 4)))
print('after multiplying 100 matrices\n', M)
# %%
import hashlib
Пример #7
0
        # Create a long enough `P`
        self.P = torch.zeros((1, max_len, num_hiddens))
        X = torch.arange(0, max_len , dtype=torch.float32).reshape(
            -1,1)/torch.pow(10000,torch.arange(
            0, num_hiddens, 2, dtype=torch.float32) / num_hiddens)
        self.P[:,:,0::2] = torch.sin(X)
        self.P[:,:,1::2] = torch.cos(X)
    
    def forward(self, X):
        X = X + self.P[:, :X.shape[1],:].to(X.device)
        return self.dropout(X)

pe = PositionalEncoding(20, 0)
pe.eval()
Y = pe(torch.zeros((1, 100, 20)))
d2l.plot(torch.arange(100), Y[0, :, 4:8].T, figsize=(6, 2.5),
         legend=["dim %d" % p for p in [4, 5, 6, 7]])
# %%
class EncoderBlock(nn.Module):
    def __init__(self, key_size, query_size, value_size,num_hiddens,
                norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,
                dropout, use_bias=False, **kwargs):
        super(EncoderBlock, self).__init__(**kwargs)
        self.attention = MultiHeadAttention(key_size, query_size, value_size,
                                            num_hiddens,num_heads,dropout,
                                            use_bias)
        self.addnorm1 = AddNorm(norm_shape, dropout)
        self.ffn = PositionWiseFFN(
            ffn_num_input, ffn_num_hiddens, num_hiddens)
        self.addnorm2 = AddNorm(norm_shape, dropout)
    
    def forward(self, X, valid_len):
Пример #8
0
from d2l import torch as d2l
import torch
import torch.nn as nn

T = 1000  # Generate a total of 1000 points
time = torch.arange(1, T + 1, dtype=torch.float32)
x = torch.sin(0.01 * time) + torch.normal(0, 0.2, (T,))
d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3))

tau = 4
features = torch.zeros((T - tau, tau))
for i in range(tau):
    features[:, i] = x[i: T - tau + i]
labels = d2l.reshape(x[tau:], (-1, 1))

batch_size, n_train = 16, 600
# Only the first `n_train` examples are used for training
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),batch_size, is_train=True)

# Function for initializing the weights of the network
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)

# A simple MLP
def get_net():
    net = nn.Sequential(nn.Linear(4, 10),
                        nn.ReLU(),
                        nn.Linear(10, 1))
    net.apply(init_weights)
    return net
Пример #9
0
vocab = d2l.Vocab(corpus)
freqs = [freq for _, freq in vocab.token_freqs]

bigram_tokens = [pair for pair in zip(corpus[:-1], corpus[1:])]
bigram_vocab = d2l.Vocab(bigram_tokens)
print(bigram_vocab.token_freqs[:10])
bifreqs = [freq for _, freq in bigram_vocab.token_freqs]

trigram_tokens = [tup for tup in zip(corpus[:-2], corpus[1:-1], corpus[2:])]
trigram_vocab = d2l.Vocab(trigram_tokens)
print(trigram_vocab.token_freqs[:10])
trifreqs = [freq for _, freq in trigram_vocab.token_freqs]

d2l.plot([freqs, bifreqs, trifreqs],
         xlabel="token: x",
         ylabel="frequency: n(x)",
         xscale="log",
         yscale="log",
         legend=["unigram", "bigram", "trigram"])
d2l.plt.show()


def seq_data_iter_random(corpus, batch_size, num_steps):
    corpus = corpus[random.randint(0, num_steps - 1):]
    num_subseqs = (len(corpus) - 1) // num_steps
    initial_indices = list(range(0, num_subseqs * num_steps, num_steps))
    random.shuffle(initial_indices)

    def data(pos):
        return corpus[pos:pos + num_steps]

    num_batches = num_subseqs // batch_size
Пример #10
0
def normal(x, mu, sigma):
    p = 1 / math.sqrt(2 * math.pi * sigma**2)
    return p * np.exp(-0.5 / sigma**2 * (x - mu)**2)





# Use numpy again for visualization
x = np.arange(-7, 7, 0.01)

# Mean and standard deviation pairs
params = [(0, 1), (0, 2), (3, 1)]
d2l.plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x',
         ylabel='p(x)', figsize=(4.5, 2.5),
         legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])





def synthetic_data(w, b, num_examples):  #@save
    """Generate y = Xw + b + noise."""
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
Пример #11
0
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)

net.apply(init_weights);

loss = nn.CrossEntropyLoss(reduction='none')

trainer = torch.optim.SGD(net.parameters(), lr=0.1)

num_epochs = 10
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

# 多层感知机
x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)
y = torch.relu(x)
d2l.plot(x.detach(), y.detach(), 'x', 'relu(x)', figsize=(5, 2.5))

def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)

loss = nn.CrossEntropyLoss(reduction='none')

num_epochs, lr = 10, 0.1
updater = torch.optim.SGD(params, lr=lr)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)

net = nn.Sequential(nn.Flatten(),
                    nn.Linear(784, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))
Пример #12
0
from d2l import torch as d2l
import matplotlib.pyplot as plt
import torch
from torch import nn
from RNNModel import Numeric

T = 1000  # Generate a total of 1000 points
time = d2l.arange(1, T + 1, dtype=d2l.float32)
x = d2l.sin(0.01 * time) + d2l.normal(0, 0.2, (T, ))
ax = plt.axes()
d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3), axes=ax)

batch_size = 16
train_seq_len, pred_seq_len = 360, 360
n_train = 500
n_train -= n_train % batch_size

τ = train_seq_len
features = d2l.zeros((T - τ, τ))
labels = d2l.zeros((T - τ, τ))
for i in range(τ):
    features[:, i] = x[i:T - τ + i]
    labels[:, i] = x[i + 1:T - τ + i + 1]

# Only the first `n_train` examples are used for training
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),
                            batch_size,
                            is_train=True)


def get_net_gru(hidden_size, input_size, output_size):
Пример #13
0
#%%
from d2l import torch as d2l
import torch
import random

tokens = d2l.tokenize(d2l.read_time_machine())
# Since each text line is not necessisarily a sentence or a paragraph, we
# concatenate all text lines
corpus = [token for line in tokens for token in line]
vocab = d2l.Vocab(corpus)
vocab.token_freqs[:10]
# %%
freqs = [freq for token, freq in vocab.token_freqs]
d2l.plot(freqs, xlabel='token: x', ylabel='frequency: n(x)',
         xscale='log', yscale='log')
# %%
bigram_tokens = [pair for pair in zip(corpus[:-1],corpus[1:])]
bigram_vocab = d2l.Vocab(bigram_tokens)
bigram_vocab.token_freqs[:10]

#%%
trigram_tokens = [triple for triple in zip(
    corpus[:-2], corpus[1:-1], corpus[2:])]
trigram_vocab = d2l.Vocab(trigram_tokens)
trigram_vocab.token_freqs[:10]
# %%
bigram_freqs = [freq for token, freq in bigram_vocab.token_freqs]
trigram_freqs = [freq for token, freq in trigram_vocab.token_freqs]
d2l.plot([freqs, bigram_freqs, trigram_freqs], xlabel='token: x',
         ylabel='frequency: n(x)', xscale='log', yscale='log',
         legend=['unigram', 'bigram', 'trigram'])
Пример #14
0
# used an rnn in an n-gram style :(
# THIS IS PROBABLY NOT OPTIMAL

from d2l import torch as d2l
import matplotlib.pyplot as plt
import torch
from torch import nn
from RNNModel import Numeric

#@tab mxnet, pytorch
T = 1000  # Generate a total of 1000 points
time = d2l.arange(1, T + 1, dtype=d2l.float32)
x = d2l.sin(0.01 * time) + d2l.normal(0, 0.2, (T, ))
d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3))

#@tab mxnet, pytorch
tau = 30
features = d2l.zeros((T - tau, tau))
for i in range(tau):
    features[:, i] = x[i:T - tau + i]
labels = d2l.reshape(x[tau:], (-1, 1))

batch_size = 16
n_train = 600
n_train -= n_train % batch_size
# Only the first `n_train` examples are used for training
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),
                            batch_size,
                            is_train=True)

Пример #15
0
#%%
%matplotlib inline
import torch
from torch import nn
from d2l import torch as d2l


T = 1000  # Generate a total of 1000 points
time = torch.arange(1, T + 1, dtype=torch.float32)
x = torch.sin(0.01 * time) + torch.normal(0, 0.2, (T,))
d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(20, 8))
# %%
tau = 4
features = torch.zeros((T - tau, tau))
for i in range(tau):
    features[:, i] = x[i:T - tau + i]
labels = x[tau:].reshape((-1, 1))

batch_size, n_train = 16, 600
# Only the first `n_train` examples are used for training
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),
                            batch_size, is_train=True)
# %%

nn.init..xavier_uniform_
# Function for initializing the weights of the network
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

# A simple MLP
Пример #16
0
#%%
from d2l import torch as d2l
import torch
import torch.nn as nn

#%%
T = 1000
time = torch.arange(1, T + 1, dtype=torch.float32)
x = torch.sin(0.01 * time) + torch.normal(0, 0.2, (T, ))
d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3))
# %%
Пример #17
0
def train(net, train_iter, loss, epochs, lr):
    trainer = torch.optim.Adam(net.parameters(), lr)
    for epoch in range(epochs):
        for X, y in train_iter:
            trainer.zero_grad()
            l = loss(net(X), y)
            l.backward()
            trainer.step()
        print(
            f"epoch: {epoch + 1}, loss: {d2l.evaluate_loss(net, train_iter, loss):f}"
        )


net = get_net()
train(net, train_iter, loss, 5, 0.01)
onestep_preds = net(features)
multistep_preds = torch.zeros(T)
multistep_preds[:n_train + tau] = x[:n_train + tau]
for i in range(n_train + tau, T):
    multistep_preds[i] = net(multistep_preds[i - tau:i].reshape((1, -1)))
d2l.plot([time, time[tau:], time[n_train + tau:]], [
    x.detach().numpy(),
    onestep_preds.detach().numpy(),
    multistep_preds[n_train + tau:].detach().numpy()
],
         "time",
         "x", ["data", "1-step preds", "multi-step preds"], [1, 1000],
         figsize=(6, 3))
d2l.plt.show()
Пример #18
0
batch_size = 16
n_train = 600
n_train -= n_train%batch_size
# Only the first `n_train` examples are used for training
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),
                            batch_size, is_train=True)

def get_net_gru(num_hiddens=256):
    # input_size := "feature dimensions"
    rnn_layer = nn.RNN(input_size=1, hidden_size=num_hiddens)
    net = Numeric(rnn_layer, output_size=1)
    return net

net = get_net_gru(256)
# device = d2l.try_gpu()
device='cpu'
net.train(net, train_iter, lr=1, num_epochs=10, device=device)

num_preds=64
preds = net.predict(features[:n_train], num_preds=num_preds, device='cpu')

domain = n_train + num_preds
d2l.plot(
    [time[:domain], time[n_train:domain]],
    [x[:domain]   , preds.detach().numpy()],
    legend=['orig-seq', 'predictions'], xlim=[0, domain],
    figsize=(6,3)
) 

plt.show()