def test_gradients(): left = Tensor.from_builtin([[1, 2, 3], [3, 4, 1]]) right = Tensor.from_builtin([[3], [1], [-1]]) output = matrix_multiply(left, right) gradients = output.backward(np.array([[1], [-1]])) assert np.all(gradients[left] == [[3, 1, -1], [-3, -1, 1]]) assert np.all(gradients[right] == [[-2], [-2], [2]])
def test_tensor(): assert ( matrix_multiply( Tensor.from_builtin([[1, 2, 3], [3, 4, 1]]), Tensor.from_builtin([[3], [1], [-1]]), ) == Tensor.from_builtin([[2], [12]]) )
def compute_loss(): logits = matrix_multiply(data, weights) + biases probabilities = logits.softmax(-1) return -(probabilities.log() * targets).sum()
def __call__(self, input: Tensor) -> Tensor: return self.activation( matrix_multiply(input, self.weights) + self.biases)