def mv(v): v_after = sampled_kronecker_products.sampled_vec_trick( v, X2, X1, self.input2_inds, self.input1_inds) v_after = sampled_kronecker_products.sampled_vec_trick( v_after, X2.T, X1.T, None, None, self.input2_inds, self.input1_inds) + regparam * v return v_after
def gradient(v, X1, X2, Y, rowind, colind, lamb): P = sampled_kronecker_products.sampled_vec_trick(v, X2, X1, colind, rowind) z = (1. - Y*P) z = np.where(z>0, z, 0) sv = np.nonzero(z)[0] rows = rowind[sv] cols = colind[sv] A = - sampled_kronecker_products.sampled_vec_trick(Y[sv], X2.T, X1.T, None, None, cols, rows) B = sampled_kronecker_products.sampled_vec_trick(P[sv], X2.T, X1.T, None, None, cols, rows) return A + B + lamb*v
def hessian(v, p, X1, X2, Y, rowind, colind, lamb): P = sampled_kronecker_products.sampled_vec_trick(v, X2, X1, colind, rowind) z = (1. - Y*P) z = np.where(z>0, z, 0) sv = np.nonzero(z)[0] rows = rowind[sv] cols = colind[sv] p_after = sampled_kronecker_products.sampled_vec_trick(p, X2, X1, cols, rows) p_after = sampled_kronecker_products.sampled_vec_trick(p_after, X2.T, X1.T, None, None, cols, rows) return p_after + lamb*p
def mv_mk(v): vsum = regparam * v for i in range(len(X1)): X1i = X1[i] X2i = X2[i] v_after = sampled_kronecker_products.sampled_vec_trick( v, X2i, X1i, self.input2_inds, self.input1_inds) v_after = sampled_kronecker_products.sampled_vec_trick( v_after, X2i.T, X1i.T, None, None, self.input2_inds, self.input1_inds) vsum = vsum + v_after return vsum
def dual_svm_objective(a, K1, K2, Y, rowind, colind, lamb): #dual form of the objective function for support vector machine #a: current dual solution #K1: samples x samples kernel matrix for domain 1 #K2: samples x samples kernel matrix for domain 2 #rowind: row indices for training pairs #colind: column indices for training pairs #lamb: regularization parameter P = sampled_kronecker_products.sampled_vec_trick(a, K2, K1, colind, rowind, colind, rowind) z = (1. - Y*P) z = np.where(z>0, z, 0) Ka = sampled_kronecker_products.sampled_vec_trick(a, K2, K1, colind, rowind, colind, rowind) return 0.5*(np.dot(z,z)+lamb*np.dot(a, Ka))
def func(v, X1, X2, Y, rowind, colind, lamb): P = sampled_kronecker_products.sampled_vec_trick(v, X2, X1, colind, rowind) z = (1. - Y*P) #print z z = np.where(z>0, z, 0) #return np.dot(z,z) return 0.5*(np.dot(z,z)+lamb*np.dot(v,v))
def mv(v): rows = rowind[sv] cols = colind[sv] p = np.zeros(len(rowind)) A = sampled_kronecker_products.sampled_vec_trick(v, K2, K1, cols, rows, colind, rowind) p[sv] = A return p + lamb * v
def verbosity_wrapper(v, M, N, row_inds_N = None, row_inds_M = None, col_inds_N = None, col_inds_M = None): rc_m, cc_m = M.shape rc_n, cc_n = N.shape if row_inds_N is None: u_len = rc_m * rc_n else: u_len = len(row_inds_N) if col_inds_N is None: v_len = cc_m * cc_n else: v_len = len(col_inds_N) ss = '' if rc_m * v_len + cc_n * u_len < rc_n * v_len + cc_m * u_len: ss += 'rc_m * v_len + cc_n * u_len < rc_n * v_len + cc_m * u_len\n' if col_inds_N is None: ss += 'col_inds_N is None\n' if row_inds_N is None: ss += 'row_inds_N is None\n' else: ss += 'rc_m * v_len + cc_n * u_len >= rc_n * v_len + cc_m * u_len\n' if col_inds_N is None: ss += 'col_inds_N is None\n' if row_inds_N is None: ss += 'row_inds_N is None\n' print(ss) return sampled_kronecker_products.sampled_vec_trick(v, M, N, row_inds_N, row_inds_M, col_inds_N, col_inds_M)
def mv_mk(v): vsum = regparam * v for i in range(len(K1)): K1i = K1[i] K2i = K2[i] vsum += weights[ i] * sampled_kronecker_products.sampled_vec_trick( v, K2i, K1i, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) return vsum
def cgcb(v): if self.compute_risk: P = sampled_kronecker_products.sampled_vec_trick( v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) z = (Y - P) Ka = sampled_kronecker_products.sampled_vec_trick( v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) loss = (np.dot(z, z) + regparam * np.dot(v, Ka)) print("loss", 0.5 * loss) if loss < self.bestloss: self.A = v.copy() self.bestloss = loss else: self.A = v if not self.callbackfun is None: self.predictor = KernelPairwisePredictor( self.A, self.input1_inds, self.input2_inds) self.callbackfun.callback(self)
def inner_predict(K1pred, K2pred, row_inds_K1pred=None, row_inds_K2pred=None): if len(K1pred.shape) == 1: K1pred = K1pred.reshape(1, K1pred.shape[0]) if len(K2pred.shape) == 1: K2pred = K2pred.reshape(1, K2pred.shape[0]) if row_inds_K1pred is not None: row_inds_K1pred = np.array(row_inds_K1pred, dtype=np.int32) row_inds_K2pred = np.array(row_inds_K2pred, dtype=np.int32) P = sampled_kronecker_products.sampled_vec_trick( self.A, K2pred, K1pred, row_inds_K2pred, row_inds_K1pred, self.row_inds_K2training, self.row_inds_K1training) else: P = sampled_kronecker_products.sampled_vec_trick( self.A, K2pred, K1pred, None, None, self.row_inds_K2training, self.row_inds_K1training) #P = P.reshape((K1pred.shape[0], K2pred.shape[0]), order = 'F') P = np.array(P) return P
def cgcb(v): if self.compute_risk: P = sampled_kronecker_products.sampled_vec_trick( v, X2, X1, self.input2_inds, self.input1_inds) z = (Y - P) loss = (np.dot(z, z) + regparam * np.dot(v, v)) if loss < self.bestloss: self.W = v.copy().reshape((x1fsize, x2fsize), order='F') self.bestloss = loss else: self.W = v.reshape((x1fsize, x2fsize), order='F') if not self.callbackfun is None: self.predictor = LinearPairwisePredictor(self.W) self.callbackfun.callback(self)
def predict(self, X1pred, X2pred, row_inds_X1pred=None, row_inds_X2pred=None): """Computes predictions for test examples. Parameters ---------- X1pred : array-like, shape = [n_samples1, n_features1] the first part of the test data matrix X2pred : array-like, shape = [n_samples2, n_features2] the second part of the test data matrix row_inds_X1pred : list of indices, shape = [n_test_pairs], optional maps rows of X1pred to vector of predictions P. If not supplied, predictions are computed for all possible test pair combinations. row_inds_X2pred : list of indices, shape = [n_test_pairs], optional maps rows of X2pred to vector of predictions P. If not supplied, predictions are computed for all possible test pair combinations. Returns ---------- P : array, shape = [n_test_pairs] or [n_samples1*n_samples2] predictions, either ordered according to the supplied row indices, or if no such are supplied by default prediction for (X1[i], X2[j]) maps to P[i + j*n_samples1]. """ if len(X1pred.shape) == 1: if self.W.shape[0] > 1: X1pred = X1pred[np.newaxis, ...] else: X1pred = X1pred[..., np.newaxis] if len(X2pred.shape) == 1: if self.W.shape[1] > 1: X2pred = X2pred[np.newaxis, ...] else: X2pred = X2pred[..., np.newaxis] if row_inds_X1pred is None: P = np.dot(np.dot(X1pred, self.W), X2pred.T) else: P = sampled_kronecker_products.sampled_vec_trick( self.W.reshape((self.W.shape[0] * self.W.shape[1]), order='F'), X2pred, X1pred, np.array(row_inds_X2pred, dtype=np.int32), np.array(row_inds_X1pred, dtype=np.int32), None, None) return P.ravel(order='F')
def __init__(self, **kwargs): self.resource_pool = kwargs Y = kwargs[TRAIN_LABELS] self.label_row_inds = np.array(kwargs["label_row_inds"], dtype = np.int32) self.label_col_inds = np.array(kwargs["label_col_inds"], dtype = np.int32) self.Y = Y self.trained = False if "regparam" in kwargs: self.regparam = kwargs["regparam"] else: self.regparam = 1.0 if CALLBACK_FUNCTION in kwargs: self.callbackfun = kwargs[CALLBACK_FUNCTION] else: self.callbackfun = None if "compute_risk" in kwargs: self.compute_risk = kwargs["compute_risk"] else: self.compute_risk = False regparam = self.regparam if not 'K1' in self.resource_pool: self.regparam = regparam X1 = self.resource_pool['X1'] X2 = self.resource_pool['X2'] self.X1, self.X2 = X1, X2 if 'maxiter' in self.resource_pool: maxiter = int(self.resource_pool['maxiter']) else: maxiter = 1000 if 'inneriter' in self.resource_pool: inneriter = int(self.resource_pool['inneriter']) else: inneriter = 50 x1tsize, x1fsize = X1.shape #m, d x2tsize, x2fsize = X2.shape #q, r label_row_inds = np.array(self.label_row_inds, dtype = np.int32) label_col_inds = np.array(self.label_col_inds, dtype = np.int32) Y = self.Y rowind = label_row_inds colind = label_col_inds lamb = self.regparam rowind = np.array(rowind, dtype = np.int32) colind = np.array(colind, dtype = np.int32) fdim = X1.shape[1]*X2.shape[1] w = np.zeros(fdim) #np.random.seed(1) #w = np.random.random(fdim) self.bestloss = float("inf") def mv(v): return hessian(w, v, X1, X2, Y, rowind, colind, lamb) for i in range(maxiter): g = gradient(w, X1, X2, Y, rowind, colind, lamb) G = LinearOperator((fdim, fdim), matvec=mv, rmatvec=mv, dtype=np.float64) self.best_residual = float("inf") self.w_new = qmr(G, g, tol=1e-10, maxiter=inneriter)[0] if np.all(w == w - self.w_new): break w = w - self.w_new if self.compute_risk: P = sampled_kronecker_products.sampled_vec_trick(w, X1, X2, rowind, colind) z = (1. - Y*P) z = np.where(z>0, z, 0) loss = 0.5*(np.dot(z,z)+lamb*np.dot(w,w)) if loss < self.bestloss: self.W = w.reshape((x1fsize, x2fsize), order='F') self.bestloss = loss else: self.W = w.reshape((x1fsize, x2fsize), order='F') if self.callbackfun is not None: self.callbackfun.callback(self) self.predictor = LinearPairwisePredictor(self.W) else: K1 = self.resource_pool['K1'] K2 = self.resource_pool['K2'] if 'maxiter' in self.resource_pool: maxiter = int(self.resource_pool['maxiter']) else: maxiter = 100 if 'inneriter' in self.resource_pool: inneriter = int(self.resource_pool['inneriter']) else: inneriter = 1000 label_row_inds = np.array(self.label_row_inds, dtype = np.int32) label_col_inds = np.array(self.label_col_inds, dtype = np.int32) Y = self.Y rowind = label_row_inds colind = label_col_inds lamb = self.regparam rowind = np.array(rowind, dtype = np.int32) colind = np.array(colind, dtype = np.int32) ddim = len(rowind) a = np.zeros(ddim) self.bestloss = float("inf") def func(a): P = sampled_kronecker_products.sampled_vec_trick(a, K2, K1, colind, rowind, colind, rowind) z = (1. - Y*P) z = np.where(z>0, z, 0) Ka = sampled_kronecker_products.sampled_vec_trick(a, K2, K1, colind, rowind, colind, rowind) return 0.5*(np.dot(z,z)+lamb*np.dot(a, Ka)) def mv(v): rows = rowind[sv] cols = colind[sv] p = np.zeros(len(rowind)) A = sampled_kronecker_products.sampled_vec_trick(v, K2, K1, cols, rows, colind, rowind) p[sv] = A return p + lamb * v def rv(v): rows = rowind[sv] cols = colind[sv] p = sampled_kronecker_products.sampled_vec_trick(v[sv], K2, K1, colind, rowind, cols, rows) return p + lamb * v for i in range(maxiter): P = sampled_kronecker_products.sampled_vec_trick(a, K2, K1, colind, rowind, colind, rowind) z = (1. - Y*P) z = np.where(z>0, z, 0) sv = np.nonzero(z)[0] B = np.zeros(P.shape) B[sv] = P[sv]-Y[sv] B = B + lamb*a #solve Ax = B A = LinearOperator((ddim, ddim), matvec=mv, rmatvec=rv, dtype=np.float64) self.a_new = qmr(A, B, tol=1e-10, maxiter=inneriter)[0] if np.all(a == a - self.a_new): break a = a - self.a_new if self.compute_risk: loss = func(a) if loss < self.bestloss: self.A = a self.bestloss = loss else: self.A = a self.predictor = KernelPairwisePredictor(a, rowind, colind) if self.callbackfun is not None: self.callbackfun.callback(self) self.predictor = KernelPairwisePredictor(a, rowind, colind) if self.callbackfun is not None: self.callbackfun.finished(self)
def mv(v): return sampled_kronecker_products.sampled_vec_trick( v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) + regparam * v
def __init__(self, **kwargs): Y = kwargs["Y"] self.input1_inds = np.array(kwargs["label_row_inds"], dtype=np.int32) self.input2_inds = np.array(kwargs["label_col_inds"], dtype=np.int32) Y = array_tools.as_2d_array(Y) self.Y = np.mat(Y) self.trained = False if "regparam" in kwargs: self.regparam = kwargs["regparam"] else: self.regparam = 0. if CALLBACK_FUNCTION in kwargs: self.callbackfun = kwargs[CALLBACK_FUNCTION] else: self.callbackfun = None if "compute_risk" in kwargs: self.compute_risk = kwargs["compute_risk"] else: self.compute_risk = False regparam = self.regparam if 'K1' in kwargs: K1 = kwargs['K1'] K2 = kwargs['K2'] if 'maxiter' in kwargs: maxiter = int(kwargs['maxiter']) else: maxiter = None Y = np.array(self.Y).ravel(order='F') self.bestloss = float("inf") def mv(v): return sampled_kronecker_products.sampled_vec_trick( v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) + regparam * v def mv_mk(v): vsum = regparam * v for i in range(len(K1)): K1i = K1[i] K2i = K2[i] vsum += weights[ i] * sampled_kronecker_products.sampled_vec_trick( v, K2i, K1i, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) return vsum def mvr(v): raise Exception('You should not be here!') def cgcb(v): if self.compute_risk: P = sampled_kronecker_products.sampled_vec_trick( v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) z = (Y - P) Ka = sampled_kronecker_products.sampled_vec_trick( v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) loss = (np.dot(z, z) + regparam * np.dot(v, Ka)) print("loss", 0.5 * loss) if loss < self.bestloss: self.A = v.copy() self.bestloss = loss else: self.A = v if not self.callbackfun is None: self.predictor = KernelPairwisePredictor( self.A, self.input1_inds, self.input2_inds) self.callbackfun.callback(self) if isinstance(K1, (list, tuple)): if 'weights' in kwargs: weights = kwargs['weights'] else: weights = np.ones((len(K1))) G = LinearOperator( (len(self.input1_inds), len(self.input1_inds)), matvec=mv_mk, rmatvec=mvr, dtype=np.float64) else: weights = None G = LinearOperator( (len(self.input1_inds), len(self.input1_inds)), matvec=mv, rmatvec=mvr, dtype=np.float64) self.A = minres(G, self.Y, maxiter=maxiter, callback=cgcb, tol=1e-20)[0] self.predictor = KernelPairwisePredictor(self.A, self.input1_inds, self.input2_inds, weights) else: X1 = kwargs['X1'] X2 = kwargs['X2'] self.X1, self.X2 = X1, X2 if 'maxiter' in kwargs: maxiter = int(kwargs['maxiter']) else: maxiter = None if isinstance(X1, (list, tuple)): raise NotImplementedError( "Got list or tuple as X1 but multiple kernel learning has not been implemented for the proal case yet." ) x1tsize, x1fsize = X1[0].shape #m, d x2tsize, x2fsize = X2[0].shape #q, r else: x1tsize, x1fsize = X1.shape #m, d x2tsize, x2fsize = X2.shape #q, r kronfcount = x1fsize * x2fsize Y = np.array(self.Y).ravel(order='F') self.bestloss = float("inf") def mv(v): v_after = sampled_kronecker_products.sampled_vec_trick( v, X2, X1, self.input2_inds, self.input1_inds) v_after = sampled_kronecker_products.sampled_vec_trick( v_after, X2.T, X1.T, None, None, self.input2_inds, self.input1_inds) + regparam * v return v_after def mv_mk(v): vsum = regparam * v for i in range(len(X1)): X1i = X1[i] X2i = X2[i] v_after = sampled_kronecker_products.sampled_vec_trick( v, X2i, X1i, self.input2_inds, self.input1_inds) v_after = sampled_kronecker_products.sampled_vec_trick( v_after, X2i.T, X1i.T, None, None, self.input2_inds, self.input1_inds) vsum = vsum + v_after return vsum def mvr(v): raise Exception('You should not be here!') return None def cgcb(v): if self.compute_risk: P = sampled_kronecker_products.sampled_vec_trick( v, X2, X1, self.input2_inds, self.input1_inds) z = (Y - P) loss = (np.dot(z, z) + regparam * np.dot(v, v)) if loss < self.bestloss: self.W = v.copy().reshape((x1fsize, x2fsize), order='F') self.bestloss = loss else: self.W = v.reshape((x1fsize, x2fsize), order='F') if not self.callbackfun is None: self.predictor = LinearPairwisePredictor(self.W) self.callbackfun.callback(self) if isinstance(X1, (list, tuple)): G = LinearOperator((kronfcount, kronfcount), matvec=mv_mk, rmatvec=mvr, dtype=np.float64) vsum = np.zeros(kronfcount) v_init = np.array(self.Y).reshape(self.Y.shape[0]) for i in range(len(X1)): X1i = X1[i] X2i = X2[i] vsum += sampled_kronecker_products.sampled_vec_trick( v_init, X2i.T, X1i.T, None, None, self.input2_inds, self.input1_inds) v_init = vsum else: G = LinearOperator((kronfcount, kronfcount), matvec=mv, rmatvec=mvr, dtype=np.float64) v_init = np.array(self.Y).reshape(self.Y.shape[0]) v_init = sampled_kronecker_products.sampled_vec_trick( v_init, X2.T, X1.T, None, None, self.input2_inds, self.input1_inds) v_init = np.array(v_init).reshape(kronfcount) if 'warm_start' in kwargs: x0 = np.array(kwargs['warm_start']).reshape(kronfcount, order='F') else: x0 = None minres(G, v_init, x0=x0, maxiter=maxiter, callback=cgcb, tol=1e-20)[0].reshape((x1fsize, x2fsize), order='F') self.predictor = LinearPairwisePredictor(self.W) if not self.callbackfun is None: self.callbackfun.finished(self)
def rv(v): rows = rowind[sv] cols = colind[sv] p = sampled_kronecker_products.sampled_vec_trick(v[sv], K2, K1, colind, rowind, cols, rows) return p + lamb * v
def func(a): P = sampled_kronecker_products.sampled_vec_trick(a, K2, K1, colind, rowind, colind, rowind) z = (1. - Y*P) z = np.where(z>0, z, 0) Ka = sampled_kronecker_products.sampled_vec_trick(a, K2, K1, colind, rowind, colind, rowind) return 0.5*(np.dot(z,z)+lamb*np.dot(a, Ka))