def apply(self, grad_output: Tensor) -> list: grad_input = [] if self.t2 is not None: grad_input.append(grad_output @ Tensor(self.t2.data.T)) if self.t1 is not None: grad_input.append(Tensor(self.t1.data.T) @ grad_output) return grad_input
def main(): coef = Tensor(np.array([1, 3, 2])) x_train = Tensor(np.random.rand(100, 3)) y_train = x_train @ coef + 5 x_test = Tensor(np.random.rand(20, 3)) y_test = x_test @ coef + 5 model = Model() train(model, x_train, y_train) test(model, x_test, y_test)
def neg(t: Tensor) -> Tensor: data = -t.data requires_grad = t.requires_grad if requires_grad: neg_bw = NegBackward() neg_bw.set_next_edges(collect_next_edges(t)) return Tensor(data=data, requires_grad=True, grad_fn=neg_bw) else: return Tensor(data=data)
def exp(t: Tensor) -> Tensor: data = np.exp(t.data) requires_grad = t.requires_grad if requires_grad: exp_bw = ExpBackward() exp_bw.set_next_edges(collect_next_edges(t)) exp_bw.output = Tensor(data=data) return Tensor(data=data, requires_grad=True, grad_fn=exp_bw) else: return Tensor(data=data)
def relu(t: Tensor) -> Tensor: data = np.maximum(t.data, 0) requires_grad = t.requires_grad if requires_grad: relu_bw = ReluBackward() relu_bw.set_next_edges(collect_next_edges(t)) relu_bw.input = Tensor(data=t.data) return Tensor(data=data, requires_grad=True, grad_fn=relu_bw) else: return Tensor(data=data)
def t(t: Tensor) -> Tensor: # transpose data = t.data.T requires_grad = t.requires_grad if requires_grad: t_bw = TBackward() t_bw.set_next_edges(collect_next_edges(t)) return Tensor(data=data, requires_grad=True, grad_fn=t_bw) else: return Tensor(data=data)
def sum(t: Tensor, axis: Union[int, Tuple[int]] = None) -> Tensor: data = t.data.sum(axis=axis) requires_grad = t.requires_grad if requires_grad: sum_bw = SumBackward() sum_bw.set_next_edges(collect_next_edges(t)) sum_bw.axis = axis sum_bw.shape = t.shape return Tensor(data=data, requires_grad=True, grad_fn=sum_bw) else: return Tensor(data=data)
def mean(t: Tensor, axis: Union[int, Tuple[int]] = None) -> Tensor: data = t.data.mean(axis=axis) requires_grad = t.requires_grad if requires_grad: mean_bw = MeanBackward() mean_bw.set_next_edges(collect_next_edges(t)) mean_bw.axis = axis mean_bw.shape = t.shape return Tensor(data=data, requires_grad=True, grad_fn=mean_bw) else: return Tensor(data=data)
def pow(t1: Tensor, t2: float) -> Tensor: data = t1.data ** t2 requires_grad = t1.requires_grad if requires_grad: pow_bw = PowBackward() pow_bw.set_next_edges(collect_next_edges(t1)) pow_bw.t1 = Tensor(data=t1.data) pow_bw.t2 = t2 return Tensor(data=data, requires_grad=True, grad_fn=pow_bw) else: return Tensor(data=data)
def add(t1: Tensor, t2: Tensor) -> Tensor: data = t1.data + t2.data requires_grad = t1.requires_grad or t2.requires_grad if requires_grad: add_bw = AddBackward() add_bw.set_next_edges(collect_next_edges(t1, t2)) if t1.requires_grad: add_bw.t1_shape = t1.shape if t2.requires_grad: add_bw.t2_shape = t2.shape return Tensor(data=data, requires_grad=True, grad_fn=add_bw) else: return Tensor(data=data)
def matmul(t1: Tensor, t2: Tensor) -> Tensor: data = t1.data @ t2.data requires_grad = t1.requires_grad or t2.requires_grad if requires_grad: matmul_bw = MatMulBackward() matmul_bw.set_next_edges(collect_next_edges(t1, t2)) if t1.requires_grad: matmul_bw.t2 = t2 if t2.requires_grad: matmul_bw.t1 = t1 return Tensor(data=data, requires_grad=True, grad_fn=matmul_bw) else: return Tensor(data=data)
def sub(t1: Tensor, t2: Tensor) -> Tensor: data = t1.data - t2.data requires_grad = t1.requires_grad or t2.requires_grad if requires_grad: sub_bw = SubBackward() sub_bw.set_next_edges(collect_next_edges(t1, t2)) if t1.requires_grad: sub_bw.t1_shape = t1.shape if t2.requires_grad: sub_bw.t2_shape = t2.shape return Tensor(data=data, requires_grad=True, grad_fn=sub_bw) else: return Tensor(data=data)
def interface_permute(tensor: Tensor, hide_function_defs: bool): if not hide_function_defs: with st.beta_expander("Show function definition"): render_function(TensorData.permute) st.write(f"**Tensor strides:** {tensor._tensor.strides}") default_permutation = list(range(len(tensor.shape))) default_permutation.reverse() permutation = eval( st.text_input("Tensor permutation", value=default_permutation)) p_tensor = tensor.permute(*permutation) p_tensor_strides = p_tensor._tensor.strides st.write(f"**Permuted tensor strides:** {p_tensor_strides}") st.write("**Try selecting a tensor value by index:**") out_index = st_select_index(tensor.shape) viz_type = st.selectbox("Choose tensor visualization", options=["Original tensor", "Permuted tensor"]) if viz_type == "Original tensor": viz_tensor = tensor else: viz_tensor = p_tensor st_visualize_tensor(viz_tensor, out_index, show_value=False) st_visualize_storage( tensor, index_to_position(out_index, viz_tensor._tensor.strides))
def mul(t1: Tensor, t2: Tensor) -> Tensor: data = t1.data * t2.data requires_grad = t1.requires_grad or t2.requires_grad if requires_grad: mul_bw = MulBackward() mul_bw.set_next_edges(collect_next_edges(t1, t2)) if t1.requires_grad: mul_bw.t2 = Tensor(data=t2.data) mul_bw.t1_shape = t1.shape if t2.requires_grad: mul_bw.t1 = Tensor(data=t1.data) mul_bw.t2_shape = t2.shape return Tensor(data=data, requires_grad=True, grad_fn=mul_bw) else: return Tensor(data=data)
def div(t1: Tensor, t2: Tensor) -> Tensor: data = t1.data / t2.data requires_grad = t1.requires_grad or t2.requires_grad if requires_grad: div_bw = DivBackward() div_bw.set_next_edges(collect_next_edges(t1, t2)) if t1.requires_grad: div_bw.t2 = Tensor(data=t2.data) div_bw.t1_shape = t1.shape if t2.requires_grad: div_bw.t1 = Tensor(data=t1.data) div_bw.t2 = Tensor(data=t2.data) if div_bw.t2 is None else div_bw.t2 div_bw.t2_shape = t2.shape return Tensor(data=data, requires_grad=True, grad_fn=div_bw) else: return Tensor(data=data)
def test_neg(self): # scalar neg t1 = Tensor(1.0) t2 = -t1 self.assertEqual(t2.data.tolist(), -1.0) t1 = Tensor(2.0, requires_grad=True) t2 = -t1 t2.backward() self.assertEqual(t1.grad.data.tolist(), -1.0) # vector neg t1 = Tensor([1.0, 2.0]) t2 = -t1 self.assertEqual(t2.data.tolist(), [-1.0, -2.0]) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = -t1 t2.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [-1.0, -1.0])
def test_pow(self): # scalar pow t1 = Tensor(2.0) t2 = t1 ** 3 self.assertEqual(t2.data.tolist(), 8.0) t1 = Tensor(2.0, requires_grad=True) t2 = t1 ** 3 t2.backward() self.assertEqual(t1.grad.data.tolist(), 12.0) # vector pow t1 = Tensor([1.0, 2.0]) t2 = t1 ** 3 self.assertEqual(t2.data.tolist(), [1.0, 8.0]) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = t1 ** 3 t2.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [3.0, 12.0])
def apply(self, grad_output: Tensor) -> tuple: if isinstance(self.axis, int): self.axis = [self.axis] if self.axis is None: shape = [1] * len(self.shape) else: shape = [ 1 if i in self.axis else self.shape[i] for i in range(len(self.shape)) ] data = grad_output.data.reshape(shape) + np.zeros(self.shape) return Tensor(data=data),
def render_tensor_sandbox(hide_function_defs: bool): st.write("## Sandbox for Tensors") st.write("**Define your tensor**") # Consistent random number generator rng = np.random.RandomState(42) # col1, col2 = st.beta_columns(2) tensor_shape = st_eval_error_message( st.text_input("Tensor shape", value="(2, 2, 2)"), "Tensor shape must be defined as an in-line tuple, i.e. (2, 2, 2)", ) tensor_size = int(operators.prod(tensor_shape)) random_tensor = st.checkbox("Fill tensor with random numbers", value=True) if random_tensor: tensor_data = np.round(rng.rand(tensor_size), 2) st.write("**Tensor data storage:**") # Visualize horizontally st.write(tensor_data.reshape(1, -1)) else: tensor_data = st_eval_error_message( st.text_input("Tensor data storage", value=str(list(range(tensor_size)))), "Tensor data storage must be defined as an in-line list, i.e. [1, 2, 3, 4]", ) try: test_tensor = Tensor.make(tensor_data, tensor_shape, backend=TensorFunctions) except AssertionError as e: storage_size = len(tensor_data) if tensor_size != storage_size: st.error( f"Tensor data storage must define all values in shape ({tensor_size} != {storage_size })" ) else: st.error(e) return select_fn = { "Visualize Tensor Definition": interface_visualize_tensor, "Visualize Tensor Strides": interface_strides, "function: index_to_position": interface_index_to_position, "function: to_index": interface_to_index, "function: TensorData.permute": interface_permute, } selected_fn = st.selectbox("Select an interface", options=list(select_fn.keys())) select_fn[selected_fn](test_tensor, hide_function_defs)
def unbroadcast(grad_input: Tensor, input_shape: tuple) -> Tensor: """When broadcast is applied to an operation, unbroadcast should also be executed when backpropagating. References: 1. https://numpy.org/doc/stable/user/basics.broadcasting.html 2. http://coldattic.info/post/116/ 3. https://github.com/joelgrus/autograd/blob/part06/autograd/tensor.py#L150 """ if grad_input.shape == input_shape: return grad_input data = grad_input.data ndims_added = len(grad_input.shape) - len(input_shape) for _ in range(ndims_added): data = data.sum(axis=0) for i, dim in enumerate(input_shape): if dim == 1: data = data.sum(axis=i, keepdims=True) return Tensor(data=data)
def test_linear(self): input = Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) target = Tensor([[6, 7, 8, 9, 10], [1, 2, 3, 4, 5]]) loss = nn.MSELoss() output = loss(input, target) self.assertEqual(output.data.tolist(), 25.)
def test_simple_mul(self): # scalar mul t1 = Tensor(1.0) t2 = Tensor(2.0) t3 = t1 * t2 self.assertEqual(t3.data.tolist(), 2.0) t1 = Tensor(1.0, requires_grad=True) t2 = Tensor(2.0) t3 = t1 * t2 t3.backward() self.assertEqual(t1.grad.data.tolist(), 2.0) t1 = Tensor(1.0) t2 = Tensor(2.0, requires_grad=True) t3 = t1 * t2 t3.backward() self.assertEqual(t2.grad.data.tolist(), 1.0) t1 = Tensor(1.0, requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 * t2 t3.backward() self.assertEqual(t1.grad.data.tolist(), 2.0) self.assertEqual(t2.grad.data.tolist(), 1.0) # vector mul t1 = Tensor([1.0, 2.0]) t2 = Tensor([2.0, 3.0]) t3 = t1 * t2 self.assertEqual(t3.data.tolist(), [2.0, 6.0]) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor([2.0, 3.0]) t3 = t1 * t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [2.0, 3.0]) t1 = Tensor([1.0, 2.0]) t2 = Tensor([2.0, 3.0], requires_grad=True) t3 = t1 * t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t2.grad.data.tolist(), [1.0, 2.0]) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor([2.0, 3.0], requires_grad=True) t3 = t1 * t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [2.0, 3.0]) self.assertEqual(t2.grad.data.tolist(), [1.0, 2.0])
def test_broadcast_mul(self): # (2,) * () t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 * t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [2.0, 2.0]) self.assertEqual(t2.grad.data.tolist(), 3.0) # (2,) * (1,) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor([2.0], requires_grad=True) t3 = t1 * t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [2.0, 2.0]) self.assertEqual(t2.grad.data.tolist(), [3.0]) # (2, 2) * () t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 * t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[2.0, 2.0], [2.0, 2.0]]) self.assertEqual(t2.grad.data.tolist(), 10.0) # (2, 2) * (1,) t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor([2.0], requires_grad=True) t3 = t1 * t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[2.0, 2.0], [2.0, 2.0]]) self.assertEqual(t2.grad.data.tolist(), [10.0]) # (2, 2) * (2, ) t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor([2.0, 3.0], requires_grad=True) t3 = t1 * t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[2.0, 3.0], [2.0, 3.0]]) self.assertEqual(t2.grad.data.tolist(), [4.0, 6.0])
def test_exp(self): # scalar exp t1 = Tensor(2.0) t2 = t1.exp() np.testing.assert_allclose(t2.data, np.exp(2)) t1 = Tensor(2.0, requires_grad=True) t2 = t1.exp() t2.backward() np.testing.assert_allclose(t1.grad.data, np.exp(2)) # vector exp t1 = Tensor([1.0, 2.0]) t2 = t1.exp() np.testing.assert_allclose(t2.data, np.exp([1, 2])) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = t1.exp() t2.backward(Tensor([1.0, 1.0])) np.testing.assert_allclose(t1.grad.data, np.exp([1, 2]))
def apply(self, grad_output: Tensor) -> list: return grad_output * Tensor(data=(self.input.data >= 0)),
def apply(self, grad_output: Tensor) -> list: return Tensor(data=grad_output.data.T),
def test_broadcast_sub(self): # (2,) - () t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 - t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) self.assertEqual(t2.grad.data.tolist(), -2.0) # (2,) - (1,) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor([2.0], requires_grad=True) t3 = t1 - t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) self.assertEqual(t2.grad.data.tolist(), [-2.0]) # (2, 2) - () t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 - t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) self.assertEqual(t2.grad.data.tolist(), -4.0) # (2, 2) - (1,) t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor([2.0], requires_grad=True) t3 = t1 - t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) self.assertEqual(t2.grad.data.tolist(), [-4.0]) # (2, 2) - (2, ) t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor([2.0, 3.0], requires_grad=True) t3 = t1 - t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) self.assertEqual(t2.grad.data.tolist(), [-2.0, -2.0])
def test_sum(self): t1 = Tensor([1., 2., 3.]) t2 = t1.sum() self.assertEqual(t2.data.tolist(), 6.) # (3,) -> () t1 = Tensor([1., 2., 3.], requires_grad=True) t2 = t1.sum() t2.backward() self.assertEqual(t1.grad.data.tolist(), [1., 1., 1.]) # (2, 3) -> (3, ) t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) t2 = t1.sum(axis=0) t2.backward(Tensor([1., 1., 1.])) self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]]) # (2, 3) -> (2, ) t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) t2 = t1.sum(axis=1) t2.backward(Tensor([1., 1.])) self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]]) # (2, 3) -> (,) t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) t2 = t1.sum() t2.backward(Tensor(1.0)) self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]])
def test_simple_div(self): # scalar div t1 = Tensor(1.0) t2 = Tensor(2.0) t3 = t1 / t2 self.assertEqual(t3.data.tolist(), 0.5) t1 = Tensor(1.0, requires_grad=True) t2 = Tensor(2.0) t3 = t1 / t2 t3.backward() self.assertEqual(t1.grad.data.tolist(), 0.5) t1 = Tensor(1.0) t2 = Tensor(2.0, requires_grad=True) t3 = t1 / t2 t3.backward() self.assertEqual(t2.grad.data.tolist(), -0.25) t1 = Tensor(1.0, requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 / t2 t3.backward() self.assertEqual(t1.grad.data.tolist(), 0.5) self.assertEqual(t2.grad.data.tolist(), -0.25) # vector div t1 = Tensor([1.0, 2.0]) t2 = Tensor([2.0, 4.0]) t3 = t1 / t2 self.assertEqual(t3.data.tolist(), [0.5, 0.5]) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor([2.0, 4.0]) t3 = t1 / t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [0.5, 0.25]) t1 = Tensor([1.0, 2.0]) t2 = Tensor([2.0, 4.0], requires_grad=True) t3 = t1 / t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t2.grad.data.tolist(), [-0.25, -1 / 8]) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor([2.0, 4.0], requires_grad=True) t3 = t1 / t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [0.5, 0.25]) self.assertEqual(t2.grad.data.tolist(), [-0.25, -1 / 8])
def test_broadcast_div(self): # (2,) / () t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 / t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [0.5, 0.5]) self.assertEqual(t2.grad.data.tolist(), -0.75) # (2,) / (1,) t1 = Tensor([1.0, 2.0], requires_grad=True) t2 = Tensor([2.0], requires_grad=True) t3 = t1 / t2 t3.backward(Tensor([1.0, 1.0])) self.assertEqual(t1.grad.data.tolist(), [0.5, 0.5]) self.assertEqual(t2.grad.data.tolist(), [-0.75]) # (2, 2) / () t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor(2.0, requires_grad=True) t3 = t1 / t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5], [0.5, 0.5]]) self.assertEqual(t2.grad.data.tolist(), -2.5) # (2, 2) / (1,) t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor([2.0], requires_grad=True) t3 = t1 / t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5], [0.5, 0.5]]) self.assertEqual(t2.grad.data.tolist(), [-2.5]) # (2, 2) / (2, ) t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) t2 = Tensor([2.0, 4.0], requires_grad=True) t3 = t1 / t2 t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.25], [0.5, 0.25]]) self.assertEqual(t2.grad.data.tolist(), [-1.0, -0.375])