Пример #1
0
 def __init__(self, num_hiddens, dropout, max_len=1000):
     super(PositionalEncoding, self).__init__()
     self.dropout = nn.Dropout(dropout)
     # Create a long enough `P`
     self.P = np.zeros((1, max_len, num_hiddens))
     X = np.arange(0, max_len).reshape(-1, 1) / np.power(
         10000, np.arange(0, num_hiddens, 2) / num_hiddens)
     self.P[:, :, 0::2] = np.sin(X)
     self.P[:, :, 1::2] = np.cos(X)
Пример #2
0
def test_power():
    A = np.full((2, INT_OVERFLOW), 2)
    B = np.ones((2, INT_OVERFLOW))
    B[-1, -1] = 3
    A.attach_grad()
    B.attach_grad()
    with mx.autograd.record():
        C = np.power(A, B)
        C.backward()
    assert C.shape == A.shape
    assert C[-1, -1] == 8
    assert A.grad.shape == A.shape
    assert A.grad[-1, -1] == 12
    assert B.grad.shape == B.shape
    assert_almost_equal(B.grad[-1, -1], 2**3 * np.log(2), rtol=1e-5, atol=1e-5)
Пример #3
0
def get_positional_embeddings(length, depth) -> np.ndarray:
    utils.check_condition(
        depth % 2 == 0,
        "Positional embeddings require an even embedding size it "
        "is however %d." % depth)
    # (1, depth)
    channels = np.arange(depth // 2).reshape((1, -1))

    # (length, 1)
    positions = np.arange(0, length).reshape((-1, 1))
    scaled_positions = positions / np.power(10000, (2 * channels) / depth)
    # sinusoids:
    sin = np.sin(scaled_positions)
    # cosines:
    cos = np.cos(scaled_positions)
    # interleave: (length, num_embed)
    encodings = np.hstack([sin, cos])
    return encodings
Пример #4
0
 def forward(self, data):
     var = np.power(data, 2).mean(-1, keepdims=True)
     data = data * np.reciprocal(np.sqrt(var + self._epsilon))
     return data * self.gamma.data() + self.beta.data()
Пример #5
0
 def forward(self, x): 
     var = np.power(x.astype('float32'), 2).mean(-1, keepdims=True)
     x = x * np.reciprocal(np.sqrt(var + self.variance_epsilon))
     if self.gemma.dtype == 'float16': 
         x = x.astype('float16')
     return self.gemma * x
Пример #6
0
import d2l_dx
from mxnet import autograd, gluon, np, npx
from mxnet.gluon import nn
npx.set_np()

maxdegree = 20
n_train, n_test = 100, 100
true_w = np.zeros(maxdegree)
true_w[0:4] = np.array([5.0, 1.2, -3.4, 5.6])

features = np.random.normal(size=(n_train + n_test, 1))
features = np.random.shuffle(features)
power = np.arange(maxdegree).reshape(1, -1)
poly_features = np.power(features, power)
poly_features = poly_features / (npx.gamma(np.arange(maxdegree) + 1).reshape(
    1, -1))
labels = np.dot(poly_features, true_w)
labels += np.random.normal(scale=0.1, size=labels.shape)

print(poly_features.shape, true_w.shape, labels.shape)
print("----------------------")
print(poly_features[0], true_w)
print("----------------------")
print(features[0])
print("----------------------")
print(poly_features[0])
print("----------------------")
print(labels[0])
Пример #7
0
import math
from mxnet import np, npx, gluon
from mxnet.gluon import nn
from d2l import mxnet as d2l

npx.set_np()

max_degree = 20  # Maximum degree of the polynomial
n_train, n_test = 100, 100  # Training and test dataset sizes
true_w = np.zeros(max_degree)  # Allocate lots of empty space
true_w[0:4] = np.array([5, 1.2, -3.4, 5.6])

features = np.random.normal(size=(n_train + n_test, 1))
np.random.shuffle(features)
poly_features = np.power(features, np.arange(max_degree).reshape(1, -1))
for i in range(max_degree):
    poly_features[:, i] /= math.gamma(i + 1)  # `gamma(n)` = (n-1)!
# Shape of `labels`: (`n_train` + `n_test`,)
labels = np.dot(poly_features, true_w)
labels += np.random.normal(scale=0.1, size=labels.shape)


def evaluate_loss(net, data_iter, loss):  #@save
    """Evaluate the loss of a model on the given dataset."""
    metric = d2l.Accumulator(2)  # Sum of losses, no. of examples
    for X, y in data_iter:
        l = loss(net(X), y)
        metric.add(l.sum(), l.size)
    return metric[0] / metric[1]