def test_broadcast_sub2(self): t1 = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True) t2 = Tensor([[7, 8, 9]], requires_grad=True) t3 = t1 - t2 # shape(2,3) assert t3.data.tolist() == [[-6, -6, -6], [-3, -3, -3]] t3.backward(Tensor([[1, 1, 1], [1, 1, 1]])) assert t1.grad.data.tolist() == [[1, 1, 1], [1, 1, 1]] assert t2.grad.data.tolist() == [[-2, -2, -2]]
def test_simple_mul(self): t1 = Tensor([[1, 2], [3, 4], [5, 6]], requires_grad=True) t2 = Tensor([[10], [20]], requires_grad=True) t3 = t1 @ t2 assert t3.data.tolist() == [[50], [110], [170]] grad = Tensor([[-1], [-2], [-3]]) t3.backward(grad) assert np.all(t1.grad.data == grad.data @ t2.data.T) assert np.all(t2.grad.data == t1.data.T @ grad.data)
def test_broadcast_add2(self): t1 = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True) t2 = Tensor([[7, 8, 9]], requires_grad=True) t3 = t1 + t2 # shape(2,3) assert t3.data.tolist() == [[8, 10, 12], [11, 13, 15]] t3.backward(Tensor([[1, 1, 1], [1, 1, 1]])) assert t1.grad.data.tolist() == [[1, 1, 1], [1, 1, 1]] assert t2.grad.data.tolist() == [[2, 2, 2]]
def test_simple_sub(self): t1 = Tensor([1, 2, 3], requires_grad=True) t2 = Tensor([4, 5, 6], requires_grad=True) t3 = t1 - t2 assert t3.data.tolist() == [-3, -3, -3] t3.backward(Tensor([-1, -2, -3])) assert t1.grad.data.tolist() == [-1, -2, -3] assert t2.grad.data.tolist() == [1, 2, 3] t1 -= 0.1 assert t1.grad is None assert t1.data.tolist() == [0.9, 1.9, 2.9]
def test_simple_add(self): t1 = Tensor([1, 2, 3], requires_grad=True) t2 = Tensor([4, 5, 6], requires_grad=True) t3 = t1 + t2 assert t3.data.tolist() == [5, 7, 9] t3.backward(Tensor([-1, -2, -3])) assert t1.grad.data.tolist() == [-1, -2, -3] assert t2.grad.data.tolist() == [-1, -2, -3] t1 += 0.1 assert t1.grad is None assert t1.data.tolist() == [1.1, 2.1, 3.1]
def __init__(self, num_channels: int, eps: float = 1e-05, momentum=0.1, affine: bool = True, track_running_stats: bool = True) -> None: # Call super constructor super(BatchNorm1d, self).__init__() # Save parameter self.eps = eps self.momentum = momentum self.track_running_stats = track_running_stats # Init learnable parameter if utilized self.gamma = Parameter(data=np.ones(num_channels)) if affine else None self.beta = Parameter(data=np.zeros(num_channels)) if affine else None # Init running mean and std if needed self.running_mean = Tensor(0.0) if self.track_running_stats else None self.running_std = Tensor(1.0) if self.track_running_stats else None
def test_broadcast_sub(self): """ eg: t1.shape==(10,5) t2.shape=(5,) => t1 + t2 ,viewed as(1,5) t2=[1,2,3,4,5] => viewed v2 as[[1,2,3,4,5]] eg: t1 as (10,5) t2 as (1,5) ias [[1,2,3,4,,5]] :return: """ t1 = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True) t2 = Tensor([7, 8, 9], requires_grad=True) t3 = t1 - t2 # shape(2,3) assert t3.data.tolist() == [[-6, -6, -6], [-3, -3, -3]] t3.backward(Tensor([[1, 1, 1], [1, 1, 1]])) assert t1.grad.data.tolist() == [[1, 1, 1], [1, 1, 1]] assert t2.grad.data.tolist() == [-2, -2, -2]
""" The idea here is that we'd like to use our library to minimize a function say x**2 """ from autograd import Tensor x = Tensor([10, -10, 10, -5, 6, 3, 1], requires_grad=True) # we want to minimize the sum of squares for i in range(100): x.zero_grad() sum_of_squares = (x * x).sum() # is a 0-tensor sum_of_squares.backward() delta_x = 0.1 * x.grad x -= delta_x print(i, sum_of_squares)
from autograd import Tensor x = Tensor([[1., 2., 3.]]) w = Tensor([[2.], [3.], [4.]], requires_grad=True) b = Tensor([.0], requires_grad=True) y_ = x.matmul(w).add(b) y = Tensor([60.]) loss = y_.sub(y).pow(Tensor(2.)).div(Tensor(2.)) loss.backward() print(loss.narray) print(w.grad) print(b.grad)
import numpy as np from autograd import Tensor t1 = Tensor(1.) print(t1.narray.dtype) xs = [] ys = [] for i in range(1, 10): xs.append(Tensor([[i]])) ys.append(Tensor([[i * 2]])) w = Tensor([[.1]], requires_grad=True) b = Tensor([.0], requires_grad=True) for i in range(9): for j in range(9): x = xs[j] y = ys[j] print('x:') print(x.narray) print('y:') print(y.narray) y_ = x.matmul(w).add(b) print('y_:') print(y_.narray) loss = y_.sub(y).pow(Tensor(2.)) print('loss:') print(loss.narray) loss.backward()
def test_simple_sum(self): t1 = Tensor([1, 2, 3], requires_grad=True) t2 = t1.sum() t2.backward() assert t1.grad.data.tolist() == [1, 1, 1]
def test_sum_with_grad(self): t1 = Tensor([1, 2, 3], requires_grad=True) t2 = t1.sum() t2.backward(Tensor(3)) assert t1.grad.data.tolist() == [3, 3, 3]
self.b1 = Parameter(num_hidden) self.w2 = Parameter(num_hidden, 4) self.b2 = Parameter(4) def predict(self, inputs: Tensor) -> Tensor: # input is (batch_size, 10) x1 = inputs @ self.w1 + self.b1 # (batch_size, num_hidden) x2 = tanh(x1) # (batch_size, num_hidden) x3 = x2 @ self.w2 + self.b2 # (batch_size, 4) return x3 if __name__ == '__main__': x_train = Tensor([binary_encode(x) for x in range(101, 1024)]) # (922, 10) tensor y_train = Tensor([fizz_buzz_encode(x) for x in range(101, 1024)]) # (922, 4) tensor optimizer = SGD(lr=1e-3) batch_size = 32 model = FizzBuzzModel() # Getting a different order of batches at each epoch starts = np.arange(0, x_train.shape[0], batch_size) for epoch in range(5000): epoch_loss = 0.0 for start in starts: end = start + batch_size
import numpy as np from autograd import Tensor, Parameter, Module from autograd.optim import SGD x_data = Tensor(np.random.randn(100, 3)) coef = Tensor(np.array([-1, +3, -2], dtype=np.float)) # (3,) # @矩阵乘法 # (100,3)*(3,1) y_data = x_data @ coef + 5 class Module(Module): def __init__(self) -> None: self.w = Parameter(3) self.b = Parameter() def predict(self, in_puts: Tensor): return in_puts @ self.w + self.b # w = Tensor(np.random.randn(3), requires_grad=True) # b = Tensor(np.random.randn(), requires_grad=True) # w = Parameter(3) # tensor(3,),requires_grad =True,random values # b = Parameter() optimizer = SGD(lr=0.001) # learning_rate = 0.001 batch_size = 32 module = Module() for epoch in range(100): epoch_loss = 0.0
def sigmoid2(x): return Tensor(1).add(x.ng().exp())
from autograd import Tensor import numpy as np def sigmoid(x): return x.exp() def sigmoid2(x): return Tensor(1).add(x.ng().exp()) x = Tensor([[1.1]], requires_grad=True) # y = x.pow(Tensor(2)) # print(y.narray) # y.backward() # print(x.grad.narray) # loss = y.sub(Tensor(10)) # print(loss.narray) # loss.backward(loss) # print(x.grad.narray) # y = Tensor(1).div(Tensor(1).add(x.ng().exp())) # print(y.narray)
def fizz_buzz_encode(x: int) -> List[int]: if x % 15 == 0: return [0, 0, 0, 1] elif x % 5 == 0: return [0, 0, 1, 0] elif x % 3 == 0: return [0, 1, 0, 0] else: return [1, 0, 0, 0] # @矩阵乘法 # (100,3)*(3,1) x_train = Tensor([binary_encode(x) for x in range(101, 1024)]) y_train = Tensor([fizz_buzz_encode(x) for x in range(101, 1024)]) class FizzBuzzModule(Module): def __init__(self, num_hidden: int = 50) -> None: self.w1 = Parameter(10, num_hidden) self.b1 = Parameter(num_hidden) self.w2 = Parameter(num_hidden, 4) self.b2 = Parameter(4) def predict(self, in_puts: Tensor): # inputs (batch_size,10) x1 = inputs @ self.w1 + self.b1 # (batch_size,num_hidden) x2 = tanh(x1)
def binary_encode(x: int) -> List[int]: return [x >> i & 1 for i in range(10)] def fizz_buzz_encode(x: int) -> List[int]: if x % 15 == 0: return [0, 0, 0, 1] elif x % 5 == 0: return [0, 0, 1, 0] elif x % 3 == 0: return [0, 1, 0, 0] else: return [1, 0, 0, 0] x_train = Tensor([binary_encode(x) for x in range(101, 1024)]) y_train = Tensor([fizz_buzz_encode(x) for x in range(101, 1024)]) class FizzBuzzModel(Module): def __init__(self, num_hidden: int = 50) -> None: self.fc1 = Linear(10, num_hidden) self.fc2 = Linear(num_hidden, 4) self.dropout = Dropout(0.1) def forward(self, inputs: Tensor) -> Tensor: return self.predict(inputs) def predict(self, inputs: Tensor) -> Tensor: # inputs will be (batch_size, 10) x1 = self.fc1(inputs) # (batch_size, num_hidden)
from autograd import Tensor, Parameter, Module from autograd.optim import SGD class LinearModel(Module): def __init__(self) -> None: self.w = Parameter(3) self.b = Parameter() def predict(self, inputs: Tensor) -> Tensor: """Learned function: y = Ax + b""" return inputs @ self.w + self.b if __name__ == '__main__': x_data = Tensor(np.random.randn(100, 3)) coef = Tensor(np.array([-1, 3, -2])) # The function to be learned y = Ax + b + eps # x_data is a tensor, so is y_data # y_data = x_data @ coef + 5 + np.random.randint(-2, 2, size=(100,)) # With a perfect linear regression, we can get 0 error y_data = x_data @ coef + 5 optimizer = SGD(lr=1e-3) batch_size = 32 model = LinearModel() for epoch in range(100): epoch_loss = 0.0
from autograd import Tensor x = Tensor([[1., 2., 3.]]) w = Tensor([[2.], [3.], [4.]], requires_grad=True) b = Tensor([.0], requires_grad=True) y_ = x.matmul(w).add(b) y = Tensor([60.]) loss = y_.sub(y).pow(Tensor(2.)).div(Tensor(2.)) print(loss.narray) loss.backward() print(w.grad) print(b.grad) w.narray = w.narray + (0.001 * w.grad.T) b.narray = b.narray + (0.001 * b.grad.T) print(w.narray) print(b.narray)