def _setup_svm(self, examples, classes, C): kernel = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p) n = len(examples) e = np.matrix(np.ones((n, 1))) # Kernel and Hessian if kernel is None: K = None H = None else: K = _smart_kernel(kernel, examples) D = spdiag(classes) H = D * K * D # Term for -sum of alphas f = -e # Sum(y_i * alpha_i) = 0 A = classes.T.astype(float) b = np.matrix([0.0]) # 0 <= alpha_i <= C lb = np.matrix(np.zeros((n, 1))) if type(C) == float: ub = C * e else: # Allow for C to be an array ub = C return K, H, f, A, b, lb, ub
def predict(self, bags): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._sv_bags is None or len(self._sv_bags) == 0: return np.zeros(len(bags)) else: kernel = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) K = kernel(map(np.asmatrix, bags), self._sv_bags) return np.array(self._b + K * spdiag(self._sv_y) * self._sv_alphas).reshape((-1,))
def predict(self, bags): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._sv_bags is None or len(self._sv_bags) == 0: return np.zeros(len(bags)) else: kernel = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) K = kernel(list(map(np.asmatrix, bags)), self._sv_bags) return np.array(self._b + K * spdiag(self._sv_y) * self._sv_alphas).reshape((-1,))
def predict(self, X): """ @param X : an n-by-m array-like object containing n examples with m features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._sv_X is None or len(self._sv_X) == 0: return np.zeros(len(X)) else: kernel = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) K = kernel(np.asmatrix(X), self._sv_X) return np.array(self._b + K * spdiag(self._sv_y) * self._sv_alphas).reshape((-1,))
def predict(self, bags): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._b is None: return np.zeros(len(bags)) else: bags = [np.asmatrix(bag) for bag in bags] k = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) D = spdiag(self._y) return np.array([np.max(self._b + self._alphas.T * D * self._V * k(self._X, bag)) for bag in bags])
def fit(self, bags, y): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @param y : an array-like object of length n containing -1/+1 labels """ def transform(mx): """ Transform into np.matrix if array/list ignore scipy.sparse matrix """ if issparse(mx): return mx.todense() return np.asmatrix(mx) self._bags = [transform(bag) for bag in bags] y = np.asmatrix(y).reshape((-1, 1)) bs = BagSplitter(self._bags, y) best_obj = float('inf') best_svm = None for rr in range(self.restarts + 1): if rr == 0: if self.verbose: print('Non-random start...') pos_bag_avgs = np.vstack( [np.average(bag, axis=0) for bag in bs.pos_bags]) else: if self.verbose: print('Random restart %d of %d...' % (rr, self.restarts)) pos_bag_avgs = np.vstack( [rand_convex(len(bag)) * bag for bag in bs.pos_bags]) intial_instances = np.vstack([bs.neg_instances, pos_bag_avgs]) classes = np.vstack([-np.ones((bs.L_n, 1)), np.ones((bs.X_p, 1))]) # Setup SVM and QP if self.scale_C: C = self.C / float(len(intial_instances)) else: C = self.C setup = self._setup_svm(intial_instances, classes, C) K = setup[0] qp = IterativeQP(*setup[1:]) # Fix Gx <= h neg_cons = spzeros(bs.X_n, bs.L_n) for b, (l, u) in enumerate(slices(bs.neg_groups)): neg_cons[b, l:u] = 1.0 pos_cons = speye(bs.X_p) bot_left = spzeros(bs.X_p, bs.L_n) top_right = spzeros(bs.X_n, bs.X_p) half_cons = sparse([[neg_cons, bot_left], [top_right, pos_cons]]) qp.G = sparse([-speye(bs.X_p + bs.L_n), half_cons]) qp.h = cvxmat( np.vstack([ np.zeros((bs.X_p + bs.L_n, 1)), C * np.ones( (bs.X_p + bs.X_n, 1)) ])) # Precompute kernel for all positive instances kernel = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p) K_all = kernel(bs.instances, bs.instances) neg_selectors = np.array(range(bs.L_n)) class MISVMCCCP(CCCP): def bailout(cself, svm, selectors, instances, K): return svm def iterate(cself, svm, selectors, instances, K): cself.mention('Training SVM...') alphas, obj = qp.solve(cself.verbose) # Construct SVM from solution svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = instances svm._y = classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Recomputing classes...') p_confs = svm.predict(bs.pos_instances) pos_selectors = bs.L_n + np.array([ l + np.argmax(p_confs[l:u]) for l, u in slices(bs.pos_groups) ]) new_selectors = np.hstack([neg_selectors, pos_selectors]) if selectors is None: sel_diff = len(new_selectors) else: sel_diff = np.nonzero(new_selectors - selectors)[0].size cself.mention('Selector differences: %d' % sel_diff) if sel_diff == 0: return None, svm elif sel_diff > 5: # Clear results to avoid a # bad starting point in # the next iteration qp.clear_results() cself.mention('Updating QP...') indices = (new_selectors, ) K = K_all[indices].T[indices].T D = spdiag(classes) qp.update_H(D * K * D) return { 'svm': svm, 'selectors': new_selectors, 'instances': bs.instances[indices], 'K': K }, None cccp = MISVMCCCP(verbose=self.verbose, svm=None, selectors=None, instances=intial_instances, K=K, max_iters=self.max_iters) svm = cccp.solve() if svm is not None: obj = float(svm._objective) if obj < best_obj: best_svm = svm best_obj = obj if best_svm is not None: self._X = best_svm._X self._y = best_svm._y self._alphas = best_svm._alphas self._objective = best_svm._objective self._compute_separator(best_svm._K)
def fit(self, bags, y): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @param y : an array-like object of length n containing -1/+1 labels """ self._bags = list(map(np.asmatrix, bags)) bs = BagSplitter(self._bags, np.asmatrix(y).reshape((-1, 1))) self._X = np.vstack([ bs.pos_instances, bs.pos_instances, bs.pos_instances, bs.neg_instances ]) self._y = np.vstack([ np.matrix(np.ones((bs.X_p + bs.L_p, 1))), -np.matrix(np.ones((bs.L_p + bs.L_n, 1))) ]) if self.scale_C: C = self.C / float(len(self._bags)) else: C = self.C # Setup SVM and adjust constraints _, _, f, A, b, lb, ub = self._setup_svm(self._y, self._y, C) ub[:bs.X_p] *= (float(bs.L_n) / float(bs.X_p)) ub[bs.X_p:bs.X_p + 2 * bs.L_p] *= (float(bs.L_n) / float(bs.L_p)) K = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p)(self._X, self._X) D = spdiag(self._y) ub0 = np.matrix(ub) ub0[bs.X_p:bs.X_p + 2 * bs.L_p] *= 0.5 def get_V(pos_classifications): eye_n = bs.L_n + 2 * bs.L_p top = np.zeros((bs.X_p, bs.L_p)) for row, (i, j) in enumerate(slices(bs.pos_groups)): top[row, i:j] = _grad_softmin(-pos_classifications[i:j], self.alpha).flat return sp.bmat([[sp.coo_matrix(top), None], [None, sp.eye(eye_n, eye_n)]]) V0 = get_V(np.matrix(np.zeros((bs.L_p, 1)))) qp = IterativeQP(D * V0 * K * V0.T * D, f, A, b, lb, ub0) best_obj = float('inf') best_svm = None for rr in range(self.restarts + 1): if rr == 0: if self.verbose: print('Non-random start...') # Train on instances alphas, obj = qp.solve(self.verbose) else: if self.verbose: print('Random restart %d of %d...' % (rr, self.restarts)) alphas = np.matrix([uniform(0.0, 1.0) for i in range(len(lb))]).T obj = Objective(0.0, 0.0) svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = self._X svm._y = self._y svm._V = V0 svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K class missCCCP(CCCP): def bailout(cself, svm, obj_val): return svm def iterate(cself, svm, obj_val): cself.mention('Linearizing constraints...') classifications = svm._predictions[bs.X_p:bs.X_p + bs.L_p] V = get_V(classifications) cself.mention('Computing slacks...') # Difference is [1 - y_i*(w*phi(x_i) + b)] pos_differences = 1.0 - classifications neg_differences = 1.0 + classifications # Slacks are positive differences only pos_slacks = np.multiply(pos_differences > 0, pos_differences) neg_slacks = np.multiply(neg_differences > 0, neg_differences) all_slacks = np.hstack([pos_slacks, neg_slacks]) cself.mention('Linearizing...') # Compute gradient across pairs slack_grads = np.vstack([ _grad_softmin(pair, self.alpha) for pair in all_slacks ]) # Stack results into one column slack_grads = np.vstack([ np.ones((bs.X_p, 1)), slack_grads[:, 0], slack_grads[:, 1], np.ones((bs.L_n, 1)) ]) # Update QP qp.update_H(D * V * K * V.T * D) qp.update_ub(np.multiply(ub, slack_grads)) # Re-solve cself.mention('Solving QP...') alphas, obj = qp.solve(self.verbose) new_svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) new_svm._X = self._X new_svm._y = self._y new_svm._V = V new_svm._alphas = alphas new_svm._objective = obj new_svm._compute_separator(K) new_svm._K = K if cself.check_tolerance(obj_val, obj): return None, new_svm return {'svm': new_svm, 'obj_val': obj}, None cccp = missCCCP(verbose=self.verbose, svm=svm, obj_val=None, max_iters=self.max_iters) svm = cccp.solve() if svm is not None: obj = float(svm._objective) if obj < best_obj: best_svm = svm best_obj = obj if best_svm is not None: self._V = best_svm._V self._alphas = best_svm._alphas self._objective = best_svm._objective self._compute_separator(best_svm._K) self._bag_predictions = self.predict(self._bags)
def fit(self, bags, y): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @param y : an array-like object of length n containing -1/+1 labels """ self._bags = map(np.asmatrix, bags) bs = BagSplitter(self._bags, np.asmatrix(y).reshape((-1, 1))) self._X = np.vstack([bs.pos_instances, bs.pos_instances, bs.pos_instances, bs.neg_instances]) self._y = np.vstack([np.matrix(np.ones((bs.X_p + bs.L_p, 1))), -np.matrix(np.ones((bs.L_p + bs.L_n, 1)))]) if self.scale_C: C = self.C / float(len(self._bags)) else: C = self.C # Setup SVM and adjust constraints _, _, f, A, b, lb, ub = self._setup_svm(self._y, self._y, C) ub[:bs.X_p] *= (float(bs.L_n) / float(bs.X_p)) ub[bs.X_p: bs.X_p + 2 * bs.L_p] *= (float(bs.L_n) / float(bs.L_p)) K = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p)(self._X, self._X) D = spdiag(self._y) ub0 = np.matrix(ub) ub0[bs.X_p: bs.X_p + 2 * bs.L_p] *= 0.5 def get_V(pos_classifications): eye_n = bs.L_n + 2 * bs.L_p top = np.zeros((bs.X_p, bs.L_p)) for row, (i, j) in enumerate(slices(bs.pos_groups)): top[row, i:j] = _grad_softmin(-pos_classifications[i:j], self.alpha).flat return sp.bmat([[sp.coo_matrix(top), None], [None, sp.eye(eye_n, eye_n)]]) V0 = get_V(np.matrix(np.zeros((bs.L_p, 1)))) qp = IterativeQP(D * V0 * K * V0.T * D, f, A, b, lb, ub0) best_obj = float('inf') best_svm = None for rr in range(self.restarts + 1): if rr == 0: if self.verbose: print('Non-random start...') # Train on instances alphas, obj = qp.solve(self.verbose) else: if self.verbose: print('Random restart %d of %d...' % (rr, self.restarts)) alphas = np.matrix([uniform(0.0, 1.0) for i in range(len(lb))]).T obj = Objective(0.0, 0.0) svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = self._X svm._y = self._y svm._V = V0 svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K class missCCCP(CCCP): def bailout(cself, svm, obj_val): return svm def iterate(cself, svm, obj_val): cself.mention('Linearizing constraints...') classifications = svm._predictions[bs.X_p: bs.X_p + bs.L_p] V = get_V(classifications) cself.mention('Computing slacks...') # Difference is [1 - y_i*(w*phi(x_i) + b)] pos_differences = 1.0 - classifications neg_differences = 1.0 + classifications # Slacks are positive differences only pos_slacks = np.multiply(pos_differences > 0, pos_differences) neg_slacks = np.multiply(neg_differences > 0, neg_differences) all_slacks = np.hstack([pos_slacks, neg_slacks]) cself.mention('Linearizing...') # Compute gradient across pairs slack_grads = np.vstack([_grad_softmin(pair, self.alpha) for pair in all_slacks]) # Stack results into one column slack_grads = np.vstack([np.ones((bs.X_p, 1)), slack_grads[:, 0], slack_grads[:, 1], np.ones((bs.L_n, 1))]) # Update QP qp.update_H(D * V * K * V.T * D) qp.update_ub(np.multiply(ub, slack_grads)) # Re-solve cself.mention('Solving QP...') alphas, obj = qp.solve(self.verbose) new_svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) new_svm._X = self._X new_svm._y = self._y new_svm._V = V new_svm._alphas = alphas new_svm._objective = obj new_svm._compute_separator(K) new_svm._K = K if cself.check_tolerance(obj_val, obj): return None, new_svm return {'svm': new_svm, 'obj_val': obj}, None cccp = missCCCP(verbose=self.verbose, svm=svm, obj_val=None, max_iters=self.max_iters) svm = cccp.solve() if svm is not None: obj = float(svm._objective) if obj < best_obj: best_svm = svm best_obj = obj if best_svm is not None: self._V = best_svm._V self._alphas = best_svm._alphas self._objective = best_svm._objective self._compute_separator(best_svm._K) self._bag_predictions = self.predict(self._bags)
def fit(self, bags, y): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @param y : an array-like object of length n containing -1/+1 labels """ self._bags = map(np.asmatrix, bags) bs = BagSplitter(self._bags, np.asmatrix(y).reshape((-1, 1))) self._X = bs.instances Ln = bs.L_n Lp = bs.L_p Xp = bs.X_p m = Ln + Xp if self.scale_C: C = self.C / float(len(self._bags)) else: C = self.C K = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p)(self._X, self._X) new_classes = np.matrix(np.vstack([-np.ones((Ln, 1)), np.ones((Xp, 1))])) self._y = new_classes D = spdiag(new_classes) setup = list(self._setup_svm(new_classes, new_classes, C))[1:] setup[0] = np.matrix([0]) qp = IterativeQP(*setup) c = cvxmat(np.hstack([np.zeros(Lp + 1), np.ones(Xp + Ln)])) b = cvxmat(np.ones((Xp, 1))) A = spz(Xp, Lp + 1 + Xp + Ln) for row, (i, j) in enumerate(slices(bs.pos_groups)): A[row, i:j] = 1.0 bottom_left = sparse(t([[-spI(Lp), spz(Lp)], [spz(m, Lp), spz(m)]])) bottom_right = sparse([spz(Lp, m), -spI(m)]) inst_cons = sparse(t([[spz(Xp, Lp), -spo(Xp)], [spz(Ln, Lp), spo(Ln)]])) G = sparse(t([[inst_cons, -spI(m)], [bottom_left, bottom_right]])) h = cvxmat(np.vstack([-np.ones((Xp, 1)), np.zeros((Ln + Lp + m, 1))])) def to_V(upsilon): bot = np.zeros((Xp, Lp)) for row, (i, j) in enumerate(slices(bs.pos_groups)): bot[row, i:j] = upsilon.flat[i:j] return sp.bmat([[sp.eye(Ln, Ln), None], [None, sp.coo_matrix(bot)]]) class MICACCCP(CCCP): def bailout(cself, alphas, upsilon, svm): return svm def iterate(cself, alphas, upsilon, svm): V = to_V(upsilon) cself.mention('Update QP...') qp.update_H(D * V * K * V.T * D) cself.mention('Solve QP...') alphas, obj = qp.solve(self.verbose) svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = self._X svm._y = self._y svm._V = V svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Update LP...') for row, (i, j) in enumerate(slices(bs.pos_groups)): G[row, i:j] = cvxmat(-svm._dotprods[Ln + i: Ln + j].T) h[Xp: Xp + Ln] = cvxmat(-(1 + svm._dotprods[:Ln])) cself.mention('Solve LP...') sol, _ = linprog(c, G, h, A, b, verbose=self.verbose) new_upsilon = sol[:Lp] if cself.check_tolerance(np.linalg.norm(upsilon - new_upsilon)): return None, svm return {'alphas': alphas, 'upsilon': new_upsilon, 'svm': svm}, None best_obj = float('inf') best_svm = None for rr in range(self.restarts + 1): if rr == 0: if self.verbose: print('Non-random start...') upsilon0 = np.matrix(np.vstack([np.ones((size, 1)) / float(size) for size in bs.pos_groups])) else: if self.verbose: print('Random restart %d of %d...' % (rr, self.restarts)) upsilon0 = np.matrix(np.vstack([rand_convex(size).T for size in bs.pos_groups])) cccp = MICACCCP(verbose=self.verbose, alphas=None, upsilon=upsilon0, svm=None, max_iters=self.max_iters) svm = cccp.solve() if svm is not None: obj = float(svm._objective) if obj < best_obj: best_svm = svm best_obj = obj if best_svm is not None: self._V = best_svm._V self._alphas = best_svm._alphas self._objective = best_svm._objective self._compute_separator(best_svm._K) self._bag_predictions = self.predict(self._bags)