def sparse_categorical_crossentropy(out: Tensor, Y: np.ndarray) -> Tensor: num_classes = out.shape[-1] YY = Y.flatten() y = np.zeros((YY.shape[0], num_classes), np.float32) # correct loss for NLL, torch NLL loss returns one per row y[range(y.shape[0]), YY] = -1.0 * num_classes y = y.reshape(list(Y.shape) + [num_classes]) y = Tensor(y) return out.mul(y).mean()
def test_tinygrad(): u = Tensor(U_init) v = Tensor(V_init) w = Tensor(W_init) x = u.mul(v).relu() y = u.mul(w).relu() out = x.add(y).mul(y).relu() out = out.logsoftmax() out = out.sum() out.backward() return out.data, u.grad.data, v.grad.data, w.grad.data
def test_tinygrad(): u = Tensor(U_init, device=self.device) v = Tensor(V_init, device=self.device) w = Tensor(W_init, device=self.device) x = u.mul(v).relu() y = u.mul(w).relu() out = x.add(y).mul(y).relu() out = out.logsoftmax() out = out.sum() out.backward() return out.cpu().data, u.cpu().grad.data, v.cpu().grad.data, w.cpu( ).grad.data
def test_nograd(self): x = Tensor(x_init, requires_grad=False) m = Tensor(m_init, requires_grad=False) W = Tensor(W_init, requires_grad=True) tmp = x.mul(m) mm = tmp.matmul(W) out = mm.relu() out = out.sum() out.backward() assert x.grad is None assert m.grad is None assert tmp.grad is None assert mm.grad is not None assert W.grad is not None