def loss(self, X: NPArray, y: NPIntArray) -> tuple[float, NPArray]: """ Structured SVM loss function, vectorized implementation. Inputs have dimension D, there are C classes, and we operate on minibatches of N examples. Inputs: - W: A numpy array of shape (D, C) containing weights. - X: A numpy array of shape (N, D) containing a minibatch of data. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label c, where 0 <= c < C. - reg: (float) regularization strength Returns a tuple of: - loss as single float - gradient with respect to weights W; an array of same shape as W """ num_train = X.shape[0] score_matrix = X.dot(self.W) correct_class_scores = score_matrix[np.arange(num_train), y].reshape(-1, 1) margin = score_matrix - correct_class_scores + 1 # note delta = 1 margin[margin < 0] = 0 margin[np.arange(num_train), y] = 0 loss = np.sum(margin) margin[margin > 0] = 1 num_y = np.sum(margin, axis=1) margin[np.arange(num_train), y] = -num_y dW = X.T.dot(margin) return loss, dW
def loss(self, X: NPArray, y: NPIntArray) -> tuple[float, NPArray]: """ Softmax loss function, vectorized version. Inputs and outputs are the same as softmax_loss_naive. """ num_classes = self.W.shape[1] num_train = X.shape[0] scores = X.dot(self.W) softmx = softmax(scores) loss = np.sum(np.log(softmx[np.arange(num_train), y])) kronecker = np.zeros((num_train, num_classes)) kronecker[np.arange(num_train), y] = 1 dW = X.T.dot(kronecker - softmx) return loss, dW
def backward(self, dout: NPArray) -> tuple[NPArray, ...]: """ Computes the backward pass for an affine layer. Inputs: - dout: Upstream derivative, of shape (N, M) - cache: Tuple of: - x: Input data, of shape (N, d_1, ... d_k) - w: Weights, of shape (D, M) Returns a tuple of: - dx: Gradient with respect to x, of shape (N, d1, ..., d_k) - dw: Gradient with respect to w, of shape (D, M) - db: Gradient with respect to b, of shape (M,) """ (x, ) = self.cache dx = dout.dot(self.w.T).reshape(x.shape) dw = x.reshape(x.shape[0], -1).T.dot(dout) db = np.sum(dout, axis=0) return dx, dw, db