def grad_f_params(self, x, y=1): """Derivative of the decision function w.r.t. the classifier parameters. Parameters ---------- x : CArray Features of the dataset on which the training objective is computed. y : int Index of the class wrt the gradient must be computed. """ xs, sv_idx = self.sv_margin() # these points are already normalized if xs is None: self.logger.debug("Warning: sv_margin is empty " "(all points are error vectors).") return None xk = x if self.preprocess is None else self.preprocess.transform(x) s = xs.shape[0] # margin support vector k = xk.shape[0] Ksk_ext = CArray.ones(shape=(s + 1, k)) Ksk_ext[:s, :] = self.kernel.k(xs, xk) return convert_binary_labels(y) * Ksk_ext # (s + 1) * k
def dloss(self, y_true, score, pos_label=1): """Computes the derivative of the square loss function with respect to `score`. Parameters ---------- y_true : CArray Ground truth (correct), targets. Vector-like array. score : CArray Outputs (predicted), targets. 2-D array of shape (n_samples, n_classes) or 1-D flat array of shape (n_samples,). If 1-D array, the probabilities provided are assumed to be that of the positive class. pos_label : {0, 1}, optional The class wrt compute the loss function derivative. Default 1. If `score` is a 1-D flat array, this parameter is ignored. Returns ------- CArray Derivative of the loss function. Vector-like array. """ if pos_label not in (0, 1): raise ValueError("only {0, 1} are accepted for `pos_label`") y_true = convert_binary_labels(y_true).ravel() # Convert to {-1, 1} score = _check_binary_score(score, pos_label) return -2.0 * y_true * (1.0 - y_true * score)
def grad_f_params(self, x, y=1): """Derivative of the decision function w.r.t. alpha and b Parameters ---------- x : CArray Samples on which the training objective is computed. y : int Index of the class wrt the gradient must be computed. """ xs, _ = self._sv_margin() # these points are already preprocessed if xs is None: self.logger.debug("Warning: sv_margin is empty " "(all points are error vectors).") return None s = xs.shape[0] # margin support vector k = x.shape[0] Ksk_ext = CArray.ones(shape=(s + 1, k)) sv = self.kernel.rv # store and recover current sv set self.kernel.rv = xs Ksk_ext[:s, :] = self.kernel.forward(x).T # x and xs are preprocessed self.kernel.rv = sv return convert_binary_labels(y) * Ksk_ext # (s + 1) * k
def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): """ Derivative of the classifier's discriminant function f(xk) computed on a set of points xk w.r.t. a single poisoning point xc This is a classifier-specific implementation, so we delegate its implementation to inherited classes. """ xc0 = xc.deepcopy() d = xc.size if hasattr(clf, 'C'): C = clf.C elif hasattr(clf, 'alpha'): C = 1.0 / clf.alpha else: raise ValueError("Error: The classifier does not have neither C " "nor alpha") H = clf.hessian_tr_params(tr.X, tr.Y) # change vector dimensions to match the mathematical formulation... yc = convert_binary_labels(yc) xc = CArray(xc.ravel()).atleast_2d() # xc is a row vector w = CArray(clf.w.ravel()).T # column vector b = clf.b grad_loss_fk = CArray(loss_grad.ravel()).T # column vector # validation points xk = self.val.X.atleast_2d() # handle normalizer, if present xc = xc if clf.preprocess is None else clf.preprocess.transform(xc) s_c = self._s(xc, w, b) sigm_c = self._sigm(yc, s_c) z_c = sigm_c * (1 - sigm_c) dbx_c = z_c * w # column vector dwx_c = ((yc * (-1 + sigm_c)) * CArray.eye(d, d)) + z_c * (w.dot(xc)) # matrix d*d G = C * (dwx_c.append(dbx_c, axis=1)) fd_params = self.classifier.grad_f_params(xk) grad_loss_params = fd_params.dot(grad_loss_fk) gt = self._compute_grad_inv(G, H, grad_loss_params) # gt = self._compute_grad_solve(G, H, grad_loss_params) # gt = self._compute_grad_solve_iterative(G, H, grad_loss_params) #* # propagating gradient back to input space if clf.preprocess is not None: return clf.preprocess.gradient(xc0, w=gt) return gt
def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): """ Derivative of the classifier's discriminant function f(xk) computed on a set of points xk w.r.t. a single poisoning point xc This is a classifier-specific implementation, so we delegate its implementation to inherited classes. """ # we should add a control here. convert_binary_labels should not be # called when y is continuous (regression problems) yc = convert_binary_labels(yc) xc0 = xc.deepcopy() # take validation points xk = self._val.X.atleast_2d() x = tr.X.atleast_2d() H = clf.hessian_tr_params(x) grad_loss_fk = CArray(loss_grad.ravel()).T # column vector # handle normalizer, if present xc = xc if clf.preprocess is None else clf.preprocess.transform(xc) xc = xc.ravel().atleast_2d() #xk = xk if clf.preprocess is None else clf.preprocess.transform(xk) # gt is the gradient in feature space k = xk.shape[0] # num validation samples d = xk.shape[1] # num features M = clf.w.T.dot( xc) # xc is column, w is row (this is an outer product) M += (clf.w.dot(xc.T) + clf.b - yc) * CArray.eye(d) db_xc = clf.w.T G = M.append(db_xc, axis=1) # add diagonal noise to the matrix that we are gong to invert H += 1e-9 * (CArray.eye(d + 1)) # # compute the derivatives of the classifier discriminant function fd_params = self.classifier.grad_f_params(xk) grad_loss_params = fd_params.dot(grad_loss_fk) # gt is the gradient in feature space gt = self._compute_grad_inv(G, H, grad_loss_params) # gt = self._compute_grad_solve(G, H, grad_loss_params) # gt = self._compute_grad_solve_iterative(G, H, grad_loss_params) #* # propagating gradient back to input space if clf.preprocess is not None: return clf.preprocess.gradient(xc0, w=gt) return gt
def dloss(self, y_true, score, pos_label=1, bound=10): """Computes the derivative of the hinge loss function with respect to `score`. Parameters ---------- y_true : CArray Ground truth (correct), targets. Vector-like array. score : CArray Outputs (predicted), targets. 2-D array of shape (n_samples, n_classes) or 1-D flat array of shape (n_samples,). If 1-D array, the probabilities provided are assumed to be that of the positive class. pos_label : {0, 1}, optional The class wrt compute the loss function derivative. Default 1. If `score` is a 1-D flat array, this parameter is ignored. bound : scalar or None, optional Set an upper bound for a linear approximation when -y*s is large to avoid numerical overflows. 10 is a generally acceptable -> log(1+exp(10)) = 10.000045 Returns ------- CArray Derivative of the loss function. Vector-like array. """ if pos_label not in (0, 1): raise ValueError("only {0, 1} are accepted for `pos_label`") y_true = convert_binary_labels(y_true).ravel() # Convert to {-1, 1} score = _check_binary_score(score, pos_label) # d/df log ( 1+ exp(-yf)) / log(2) = # 1/ log(2) * ( 1+ exp(-yf)) exp(-yf) -y v = CArray(-y_true * score).astype(float) if bound is None: h = -y_true * v.exp() / (1.0 + v.exp()) else: # linear approximation avoids numerical overflows # when -yf >> 1 : loss ~= -yf, and grad = -y h = -y_true.astype(float) h[v < bound] = h[v < bound] * v[v < bound].exp() / \ (1.0 + v[v < bound].exp()) return h / CArray([2]).log()
def hessian_tr_params(self, x, y): """Hessian of the training objective w.r.t. the classifier parameters. Parameters ---------- x : CArray Features of the dataset on which the training objective is computed. y : CArray Dataset labels. """ y = y.ravel() y = convert_binary_labels(y) y = CArray(y).astype(float).T # column vector C = self.C x = x.atleast_2d() n = x.shape[0] # nb: we compute the score before the x normalization as decision # function normalizes x s = self.decision_function(x, y=1).T sigm = self._sigm(y, s) z = sigm * (1 - sigm) # handle normalizer, if present x = x if self.preprocess is None else self.preprocess.transform(x) d = x.shape[1] # number of features in the normalized space # first derivative wrt b derived w.r.t. w diag = z * CArray.eye(n_rows=n, n_cols=n) dww = C * (x.T.dot(diag).dot(x)) + CArray.eye(d, d) # matrix d*d dbw = C * ((z * x).sum(axis=0)).T # column vector dbb = C * (z.sum(axis=None)) # scalar H = CArray.zeros((d + 1, d + 1)) H[:d, :d] = dww H[:-1, d] = dbw H[d, :-1] = dbw.T H[-1, -1] = dbb return H
def grad_f_params(self, x, y=1): """Derivative of the decision function w.r.t. the classifier parameters. Parameters ---------- x : CArray Features of the dataset on which the training objective is computed. y : int Index of the class wrt the gradient must be computed. """ if self.preprocess is not None: x = self.preprocess.transform(x) grad_f_w = self._grad_f_w(x) grad_f_b = self._grad_f_b(x) d = grad_f_w.append(grad_f_b, axis=0) return convert_binary_labels(y) * d
def _fit(self, dataset): """Trains the One-Vs-All SVM classifier. Parameters ---------- dataset : CDataset Binary (2-classes) training set. Must be a :class:`.CDataset` instance with patterns data and corresponding labels. Returns ------- trained_cls : CCLassifierSVM Instance of the SVM classifier trained using input dataset. """ self.logger.info("Training SVM with parameters: {:}".format( self.get_params())) # Setting up classifier parameters classifier = SVC( C=self.C, class_weight=self.class_weight, kernel='linear' if self.is_kernel_linear() else 'precomputed') # Computing the kernel matrix if not self.is_kernel_linear(): self._k = CArray(self.kernel.k(dataset.X)) else: self._k = dataset.X # Training classifier using precomputed kernel classifier.fit(self._k.get_data(), dataset.Y.tondarray()) # Intercept self._b = CArray(classifier.intercept_[0])[0] self.logger.debug("Classifier SVM bias: {:}".format(self._b)) # Updating SVM parameters self._w = None # Resetting `_w` to leave it None next cond is False if self.is_kernel_linear(): # Linear SVM self._w = CArray( CArray(classifier.coef_, tosparse=dataset.issparse).ravel()) self.logger.debug("Classifier SVM linear weights: \n{:}".format( self._w)) if not self.is_kernel_linear() or self.store_dual_vars is True: # Dual Space SVM or forced dual variables store self._n_sv = CArray(classifier.n_support_) self._sv_idx = CArray(classifier.support_).ravel() # Compatibility fix for differences between sklearn versions self._alpha = convert_binary_labels(dataset.Y[self.sv_idx]) * \ abs(CArray(classifier.dual_coef_).todense().ravel()) self._sv = CArray(dataset.X[self.sv_idx, :]) self.logger.debug("Classifier SVM dual weights (alphas): " "\n{:}".format(self._alpha)) else: # Resetting the dual parameters self._n_sv = None self._sv_idx = None self._alpha = None self._sv = None return classifier
def _fit(self, x, y): """Trains the One-Vs-All SVM classifier. Parameters ---------- x : CArray Array to be used for training with shape (n_samples, n_features). y : CArray Array of shape (n_samples,) containing the class labels (2-classes only). Returns ------- CClassifierSVM Trained classifier. """ self.logger.info("Training SVM with parameters: {:}".format( self.get_params())) # Setting up classifier parameters classifier = SVC( C=self.C, class_weight=self.class_weight, kernel='linear' if self.is_kernel_linear() else 'precomputed') # Computing the kernel matrix if not self.is_kernel_linear(): self._k = CArray(self.kernel.k(x)) else: self._k = x # Training classifier using precomputed kernel classifier.fit(self._k.get_data(), y.tondarray()) # Intercept self._b = CArray(classifier.intercept_[0])[0] self.logger.debug("Classifier SVM bias: {:}".format(self._b)) # Updating SVM parameters self._w = None # Resetting `_w` to leave it None next cond is False if self.is_kernel_linear(): # Linear SVM self._w = CArray( CArray(classifier.coef_, tosparse=x.issparse).ravel()) self.logger.debug("Classifier SVM linear weights: \n{:}".format( self._w)) if not self.is_kernel_linear() or self.store_dual_vars is True: # Dual Space SVM or forced dual variables store self._n_sv = CArray(classifier.n_support_) self._sv_idx = CArray(classifier.support_).ravel() # Compatibility fix for differences between sklearn versions self._alpha = convert_binary_labels(y[self.sv_idx]) * \ abs(CArray(classifier.dual_coef_).todense().ravel()) self._sv = CArray(x[self.sv_idx, :]) self.logger.debug("Classifier SVM dual weights (alphas): " "\n{:}".format(self._alpha)) else: # Resetting the dual parameters self._n_sv = None self._sv_idx = None self._alpha = None self._sv = None return classifier
def _fit(self, x, y): """Trains the One-Vs-All SVM classifier. Parameters ---------- x : CArray Array to be used for training with shape (n_samples, n_features). y : CArray Array of shape (n_samples,) containing the class labels (2-classes only). Returns ------- CClassifierSecSVM Trained classifier. """ if self.n_classes != 2: raise ValueError( "Trying to learn an SVM on more/less than two classes.") y = convert_binary_labels(y) if self.class_weight == 'balanced': n_pos = y[y == 1].shape[0] n_neg = y[y == -1].shape[0] self.weight = CArray.zeros(2) self.weight[0] = 1.0 * n_pos / (n_pos + n_neg) self.weight[1] = 1.0 * n_neg / (n_pos + n_neg) self._w = CArray.zeros(x.shape[1]) self._b = CArray(0.0) obj = self.objective(x, y) obj_new = obj for i in range(self.max_it): # pick a random sample subset idx = CArray.randsample(CArray.arange(x.shape[0], dtype=int), x.shape[0], random_state=i) # compute subgradients grad_w, grad_b = self.gradient_w_b(x[idx, :], y[idx]) for p in range(0, 71, 10): step = (self.eta**p) * 2**(-0.01 * i) / (x.shape[0]**0.5) self._w -= step * grad_w self._b -= step * grad_b # Applying UPPER bound d_ub = self.w[self._idx_ub] d_ub[d_ub > self._ub] = self._ub self.w[self._idx_ub] = d_ub # Applying LOWER bound d_lb = self.w[self._idx_lb] d_lb[d_lb < self._lb] = self._lb self.w[self._idx_lb] = d_lb obj_new = self.objective(x, y) if obj_new < obj: break if abs(obj_new - obj) < self.eps: self.logger.info("i {:}: {:}".format(i, obj_new)) # Sparse weights if input is sparse (like in CClassifierSVM) self._w = self.w.tosparse() if x.issparse else self.w return obj = obj_new if i % 10 == 0: loss = self.hinge_loss(x, y).sum() self.logger.info("i {:}: {:.4f}, L {:.4f}".format( i, obj, loss)) # Sparse weights if input is sparse (like in CClassifierSVM) self._w = self.w.tosparse() if x.issparse else self.w