Exemplo n.º 1
0
 def decompositionFromPool(self, rpool):
     kernel = rpool['kernel_obj']
     self.X = array_tools.as_2d_array(rpool['X'], True)
     if 'basis_vectors' in rpool:
         basis_vectors = array_tools.as_2d_array(rpool['basis_vectors'],
                                                 True)
         if not self.X.shape[1] == basis_vectors.shape[1]:
             raise Exception(
                 "X and basis_vectors have different number of columns")
     else:
         basis_vectors = None
     if "bias" in rpool:
         self.bias = float(rpool["bias"])
     else:
         self.bias = 1.
     if basis_vectors is not None or self.X.shape[1] > self.X.shape[0]:
         #First possibility: subset of regressors has been invoked
         if basis_vectors is not None:
             K_r = kernel.getKM(self.X).T
             Krr = kernel.getKM(basis_vectors)
             svals, evecs, U, Z = decomposeSubsetKM(K_r, Krr)
         #Second possibility: dual mode if more attributes than examples
         else:
             K = kernel.getKM(self.X).T
             svals, evecs = linalg.eig_psd(K)
             U, Z = None, None
     #Third possibility, primal decomposition
     else:
         #Invoking getPrimalDataMatrix adds the bias feature
         X = getPrimalDataMatrix(self.X, self.bias)
         evecs, svals, U = linalg.svd_economy_sized(X)
         U, Z = None, None
     return svals, evecs, U, Z
Exemplo n.º 2
0
def accuracy(Y, P):
    """Binary classification accuracy.
    
    A performance measure for binary classification problems.
    Returns the fraction of correct class predictions. P[i]>0 is
    considered a positive class prediction and P[i]<0 negative.
    P[i]==0 is considered as classifier abstaining to make a decision,
    which incurs 0.5 errors (in contrast to 0 error for correct and 1
    error for incorrect prediction).
    
    If 2-dimensional arrays are supplied as arguments, then accuracy
    is separately computed for each column, after which the accuracies
    are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    accuracy : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(accuracy_multitask(Y, P))
Exemplo n.º 3
0
def sqmprank(Y, P):
    """Squared magnitude preserving ranking error.
    
    A performance measure for ranking problems. Computes the sum of (Y[i]-Y[j]-P[i]+P[j])**2
    over all index pairs. normalized by the number of pairs. For query-structured data,
    one would typically want to compute the error separately for each query, and average.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct utility values, can be any real numbers
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error : float
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(sqmprank_multitask(Y, P))
Exemplo n.º 4
0
def cindex(Y, P):
    """Concordance, aka pairwise ranking accuracy. Computes the
    relative fraction of concordant pairs, that is, Y[i] > Y[j]
    and P[i] > P[j] (ties with P[i]=P[j] are assumed to be broken
    randomly). Equivalent to area under ROC curve, if Y[i] belong
    to {-1, 1}. An O(n*log(n)) implementation, based on order
    statistic tree computations.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, can be any real numbers. 
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    concordance index : float
        number between 0 and 1, around 0.5 means random performance
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    perfs = cindex_multitask(Y,P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance("No pairs, all the instances have the same output")
    return np.mean(perfs)
Exemplo n.º 5
0
def fscore(Y, P):
    """F1-Score.
    
    A performance measure for binary classification problems.
    F1 = 2*(Precision*Recall)/(Precision+Recall)
    
    If 2-dimensional arrays are supplied as arguments, then macro-averaged
    F-score is computed over the columns.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. P[i]>0 is treated
        as a positive, and P[i]<=0 as a negative class prediction.
    
    Returns
    -------
    fscore : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(fscore_multitask(Y,P))
Exemplo n.º 6
0
def auc(Y, P):
    """Area under the ROC curve (AUC).
    
    A performance measure for binary classification problems.
    Can be interpreted as an estimate of the probability, that
    the classifier is able to discriminate between a randomly
    drawn positive and negative training examples. An O(n*log(n))
    time implementation, with correction for tied predictions.
    
    If 2-dimensional arrays are supplied as arguments, then AUC
    is separately computed for each column, after which the AUCs
    are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    auc : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(auc_multitask(Y,P))
Exemplo n.º 7
0
def fscore(Y, P):
    """F1-Score.
    
    A performance measure for binary classification problems.
    F1 = 2*(Precision*Recall)/(Precision+Recall)
    
    If 2-dimensional arrays are supplied as arguments, then macro-averaged
    F-score is computed over the columns.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. P[i]>0 is treated
        as a positive, and P[i]<=0 as a negative class prediction.
    
    Returns
    -------
    fscore : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(fscore_multitask(Y, P))
Exemplo n.º 8
0
def accuracy(Y, P):
    """Binary classification accuracy.
    
    A performance measure for binary classification problems.
    Returns the fraction of correct class predictions. P[i]>0 is
    considered a positive class prediction and P[i]<0 negative.
    P[i]==0 is considered as classifier abstaining to make a decision,
    which incurs 0.5 errors (in contrast to 0 error for correct and 1
    error for incorrect prediction).
    
    If 2-dimensional arrays are supplied as arguments, then accuracy
    is separately computed for each column, after which the accuracies
    are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    accuracy : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(accuracy_multitask(Y, P))
Exemplo n.º 9
0
 def decompositionFromPool(self, rpool):
     kernel = rpool['kernel_obj']
     self.X = array_tools.as_2d_array(rpool['X'], True)
     if 'basis_vectors' in rpool:
         basis_vectors = array_tools.as_2d_array(rpool['basis_vectors'], True)
         if not self.X.shape[1] == basis_vectors.shape[1]:
             raise Exception("X and basis_vectors have different number of columns")
     else:
         basis_vectors = None
     if "bias" in rpool:
         self.bias = float(rpool["bias"])
     else:
         self.bias = 1.
     if basis_vectors is not None or self.X.shape[1] > self.X.shape[0]:
         #First possibility: subset of regressors has been invoked
         if basis_vectors is not None:
             K_r = kernel.getKM(self.X).T
             Krr = kernel.getKM(basis_vectors)
             svals, evecs, U, Z = decomposeSubsetKM(K_r, Krr)
         #Second possibility: dual mode if more attributes than examples
         else:
             K = kernel.getKM(self.X).T
             svals, evecs = linalg.eig_psd(K)
             U, Z = None, None
     #Third possibility, primal decomposition
     else:
         #Invoking getPrimalDataMatrix adds the bias feature
         X = getPrimalDataMatrix(self.X, self.bias)
         evecs, svals, U = linalg.svd_economy_sized(X)
         U, Z = None, None
     return svals, evecs, U, Z
Exemplo n.º 10
0
 def cv_old(self, regparam):
     rls = self.rls
     rls.solve(regparam)
     Y = rls.Y
     aucs = []
     for k in range(Y.shape[1]):
         pairs_start_inds, pairs_end_inds = [], []
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i,k] > Y[j,k]:
                     pairs_start_inds.append(i)
                     pairs_end_inds.append(j)
                 elif Y[i,k] < Y[j,k]:
                     pairs_start_inds.append(j)
                     pairs_end_inds.append(i)
         if len(pairs_start_inds) == 0:
             raise UndefinedPerformance("Leave-pair-out undefined, all labels same for output %d" %k)
         pred_start, pred_end = rls.leave_pair_out(np.array(pairs_start_inds), np.array(pairs_end_inds))
         pred_start = array_tools.as_2d_array(pred_start)
         pred_end = array_tools.as_2d_array(pred_end)
         auc = 0.
         for h in range(len(pred_start)):
             if pred_start[h,k] > pred_end[h,k]:
                 auc += 1.
             elif pred_start[h,k] == pred_end[h,k]:
                 auc += 0.5
         auc /= len(pairs_start_inds)
         aucs.append(auc)
     auc = np.mean(aucs)
     return auc, None
Exemplo n.º 11
0
def sqerror(Y, P):
    """Mean squared error.
    
    A performance measure for regression problems. Computes the sum of (Y[i]-P[i])**2
    over all index pairs, normalized by the number of instances.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Correct utility values, can be any real numbers
    P : {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error : float
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(sqerror_multitask(Y,P))
Exemplo n.º 12
0
def cindex(Y, P):
    """Concordance, aka pairwise ranking accuracy. Computes the
    relative fraction of concordant pairs, that is, Y[i] > Y[j]
    and P[i] > P[j] (ties with P[i]=P[j] are assumed to be broken
    randomly). Equivalent to area under ROC curve, if Y[i] belong
    to {-1, 1}. An O(n*log(n)) implementation, based on order
    statistic tree computations.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, can be any real numbers. 
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    concordance index : float
        number between 0 and 1, around 0.5 means random performance
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    perfs = cindex_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance(
            "No pairs, all the instances have the same output")
    return np.mean(perfs)
Exemplo n.º 13
0
def auc(Y, P):
    """Area under the ROC curve (AUC).
    
    A performance measure for binary classification problems.
    Can be interpreted as an estimate of the probability, that
    the classifier is able to discriminate between a randomly
    drawn positive and negative training examples. An O(n*log(n))
    time implementation, with correction for tied predictions.
    
    If 2-dimensional arrays are supplied as arguments, then AUC
    is separately computed for each column, after which the AUCs
    are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    auc : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(auc_multitask(Y, P))
Exemplo n.º 14
0
 def cv_old(self, regparam):
     rls = self.rls
     rls.solve(regparam)
     Y = rls.Y
     aucs = []
     for k in range(Y.shape[1]):
         pairs_start_inds, pairs_end_inds = [], []
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i,k] > Y[j,k]:
                     pairs_start_inds.append(i)
                     pairs_end_inds.append(j)
                 elif Y[i,k] < Y[j,k]:
                     pairs_start_inds.append(j)
                     pairs_end_inds.append(i)
         if len(pairs_start_inds) == 0:
             raise UndefinedPerformance("Leave-pair-out undefined, all labels same for output %d" %k)
         pred_start, pred_end = rls.leave_pair_out(np.array(pairs_start_inds), np.array(pairs_end_inds))
         pred_start = array_tools.as_2d_array(pred_start)
         pred_end = array_tools.as_2d_array(pred_end)
         auc = 0.
         for h in range(len(pred_start)):
             if pred_start[h,k] > pred_end[h,k]:
                 auc += 1.
             elif pred_start[h,k] == pred_end[h,k]:
                 auc += 0.5
         auc /= len(pairs_start_inds)
         aucs.append(auc)
     auc = np.mean(aucs)
     return auc, None
Exemplo n.º 15
0
Arquivo: rls.py Projeto: disc5/RLScore
 def cv(self, regparam):
     rls = self.rls
     rls.solve(regparam)
     Y = rls.Y
     #Union of all pairs for which predictions are needed
     all_pairs = set([])
     for k in range(Y.shape[1]):
         pairs = []
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i, k] != Y[j, k]:
                     pairs.append((i, j))
         #If all labels for some column are same, ranking accuracy is undefined
         if len(pairs) == 0:
             raise UndefinedPerformance(
                 "Leave-pair-out undefined, all labels same for output %d" %
                 k)
         all_pairs.update(pairs)
     all_start_inds = [x[0] for x in all_pairs]
     all_end_inds = [x[1] for x in all_pairs]
     #Compute leave-pair-out predictions for all pairs
     all_start_inds = np.array(all_start_inds)
     all_end_inds = np.array(all_end_inds)
     pred_start, pred_end = rls.leave_pair_out(all_start_inds, all_end_inds)
     pred_start = array_tools.as_2d_array(pred_start)
     pred_end = array_tools.as_2d_array(pred_end)
     pair_dict = dict(zip(all_pairs, range(pred_start.shape[0])))
     aucs = []
     #compute auc/ranking accuracy for each column of Y separately
     for k in range(Y.shape[1]):
         comparisons = []
         #1 if the true and predicted agree, 0 if disagree, 0.5 if predictions tied
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i, k] > Y[j, k]:
                     ind = pair_dict[(i, j)]
                     if pred_start[ind, k] > pred_end[ind, k]:
                         comparisons.append(1.)
                     elif pred_start[ind, k] == pred_end[ind, k]:
                         comparisons.append(0.5)
                     else:
                         comparisons.append(0.)
                 elif Y[i, k] < Y[j, k]:
                     ind = pair_dict[(i, j)]
                     if pred_start[ind, k] < pred_end[ind, k]:
                         comparisons.append(1.)
                     elif pred_start[ind, k] == pred_end[ind, k]:
                         comparisons.append(0.5)
                     else:
                         comparisons.append(0.)
         auc = np.mean(comparisons)
         aucs.append(auc)
     #Take the mean of all columnwise aucs
     auc = np.mean(aucs)
     return auc, None
Exemplo n.º 16
0
 def cv(self, regparam):
     rls = self.rls
     rls.solve(regparam)
     Y = rls.Y
     #Union of all pairs for which predictions are needed
     all_pairs = set([])
     for k in range(Y.shape[1]):
         pairs = []
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i,k] != Y[j,k]:
                     pairs.append((i,j))
         #If all labels for some column are same, ranking accuracy is undefined
         if len(pairs) == 0:
             raise UndefinedPerformance("Leave-pair-out undefined, all labels same for output %d" %k)
         all_pairs.update(pairs)
     all_start_inds = [x[0] for x in all_pairs]
     all_end_inds = [x[1] for x in all_pairs]
     #Compute leave-pair-out predictions for all pairs
     all_start_inds = np.array(all_start_inds)
     all_end_inds = np.array(all_end_inds)
     pred_start, pred_end = rls.leave_pair_out(all_start_inds, all_end_inds)
     pred_start = array_tools.as_2d_array(pred_start)
     pred_end = array_tools.as_2d_array(pred_end)
     pair_dict = dict(zip(all_pairs, range(pred_start.shape[0])))
     aucs = []
     #compute auc/ranking accuracy for each column of Y separately
     for k in range(Y.shape[1]):
         comparisons = []
         #1 if the true and predicted agree, 0 if disagree, 0.5 if predictions tied
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i,k] > Y[j,k]:
                     ind = pair_dict[(i,j)]
                     if pred_start[ind,k] > pred_end[ind,k]:
                         comparisons.append(1.)
                     elif pred_start[ind,k] == pred_end[ind,k]:
                         comparisons.append(0.5)
                     else:
                         comparisons.append(0.)
                 elif Y[i,k] < Y[j,k]:
                     ind = pair_dict[(i,j)]
                     if pred_start[ind,k] < pred_end[ind,k]:
                         comparisons.append(1.)
                     elif pred_start[ind,k] == pred_end[ind,k]:
                         comparisons.append(0.5)
                     else:
                         comparisons.append(0.)
         auc = np.mean(comparisons)
         aucs.append(auc)
     #Take the mean of all columnwise aucs
     auc = np.mean(aucs)
     return auc, None
Exemplo n.º 17
0
 def __init__(self,
              X,
              Y,
              regparam=1.0,
              kernel='LinearKernel',
              basis_vectors=None,
              **kwargs):
     Y = array_tools.as_2d_array(Y)
     self.Y = np.mat(Y)
     if X.shape[0] != Y.shape[0]:
         raise Exception("First dimension of X and Y must be the same")
     if basis_vectors != None:
         if X.shape[1] != basis_vectors.shape[1]:
             raise Exception(
                 "Number of columns for X and basis_vectors must be the same"
             )
     kwargs["bias"] = 0.
     kwargs['kernel'] = kernel
     kwargs['X'] = X
     if basis_vectors != None:
         kwargs['basis_vectors'] = basis_vectors
     self.svdad = creators.createSVDAdapter(**kwargs)
     self.regparam = regparam
     self.svals = self.svdad.svals
     self.svecs = self.svdad.rsvecs
     self.size = self.Y.shape[0]
     self.solve(self.regparam)
Exemplo n.º 18
0
 def getKM(self, X):
     """Returns the kernel matrix between the basis vectors and X.
     
     Parameters
     ----------
     X : {array-like, sparse matrix}, shape = [n_samples, n_features]
     
     Returns
     -------
     K : array, shape = [n_samples, n_bvectors]
         kernel matrix
     """
     X = array_tools.as_2d_array(X, True)
     test_X = X
     if sp.issparse(test_X):
         test_X = array_tools.spmat_resize(test_X, self.train_X.shape[1])
     else:
         test_X = array_tools.as_dense_matrix(test_X)
     gamma = self.gamma
     m = self.train_X.shape[0]
     n = test_X.shape[0]
     #The Gaussian kernel matrix is constructed from a linear kernel matrix
     linkm = self.train_X * test_X.T
     linkm = array_tools.as_dense_matrix(linkm)
     if sp.issparse(test_X):
         test_norms = ((test_X.T.multiply(test_X.T)).sum(axis=0)).T
     else:
         test_norms = (np.multiply(test_X.T, test_X.T).sum(axis=0)).T
     K = mat(np.ones((m, 1), dtype=float64)) * test_norms.T
     K = K + self.train_norms * mat(np.ones((1, n), dtype=float64))
     K = K - 2 * linkm
     K = -gamma * K
     K = np.exp(K)
     return K.A.T
Exemplo n.º 19
0
 def __init__(self, X, Y, subsetsize, regparam = 1.0, bias=1.0, measure=None, callbackfun=None, **kwargs):
     self.callbackfun = callbackfun
     self.regparam = regparam
     if isinstance(X, sp.base.spmatrix):
         self.X = X.todense()
     else:
         self.X = X
     self.X = self.X.T
     self.Y = array_tools.as_2d_array(Y)
     #Number of training examples
     self.size = self.Y.shape[0]
     #if not self.Y.shape[1] == 1:
     #    raise Exception('GreedyRLS currently supports only one output at a time. The output matrix is now of shape ' + str(self.Y.shape) + '.')
     self.bias = bias
     self.measure = measure
     fsize = X.shape[1]
     self.desiredfcount = subsetsize
     if not fsize >= self.desiredfcount:
         raise Exception('The overall number of features ' + str(fsize) + ' is smaller than the desired number ' + str(self.desiredfcount) + ' of features to be selected.')
     self.results = {}
     ##The current version works only with the squared error measure
     #self.measure = None
     #self.solve_bu(self.regparam)
     #return
     #if not self.Y.shape[1] == 1:
     self.solve_weak(self.regparam)
Exemplo n.º 20
0
 def __init__(self,
              X,
              Y,
              subsetsize,
              regparam=1.0,
              bias=1.0,
              callbackfun=None,
              **kwargs):
     self.callbackfun = callbackfun
     self.regparam = regparam
     if isinstance(X, sp.base.spmatrix):
         self.X = X.todense()
     else:
         self.X = X
     self.X = self.X.T
     self.X = self.X.astype("float64", copy=False)
     self.Y = np.mat(array_tools.as_2d_array(Y))
     #Number of training examples
     self.size = self.Y.shape[0]
     self.bias = bias
     self.measure = None
     fsize = X.shape[1]
     self.desiredfcount = subsetsize
     if not fsize >= self.desiredfcount:
         raise Exception('The overall number of features ' + str(fsize) +
                         ' is smaller than the desired number ' +
                         str(self.desiredfcount) +
                         ' of features to be selected.')
     self.results = {}
     if 'use_default_callback' in kwargs and bool(
             kwargs['use_default_callback']):
         self.callbackfun = DefaultCallback(**kwargs)
     #The current version works only with the squared error measure
     self._solve_cython(self.regparam)
Exemplo n.º 21
0
 def getKM(self, X):
     """Returns the kernel matrix between the basis vectors and X.
     
     Parameters
     ----------
     X : {array-like, sparse matrix}, shape = [n_samples, n_features]
     
     Returns
     -------
     K : array, shape = [n_samples, n_bvectors]
         kernel matrix
     """
     X = array_tools.as_2d_array(X, True)
     test_X = X 
     if sp.issparse(test_X):
         test_X = array_tools.spmat_resize(test_X, self.train_X.shape[1])
     else:
         test_X = array_tools.as_dense_matrix(test_X)
     gamma = self.gamma
     m = self.train_X.shape[0]
     n = test_X.shape[0]
     #The Gaussian kernel matrix is constructed from a linear kernel matrix
     linkm = self.train_X * test_X.T
     linkm = array_tools.as_dense_matrix(linkm)
     if sp.issparse(test_X):
         test_norms = ((test_X.T.multiply(test_X.T)).sum(axis=0)).T
     else:
         test_norms = (np.multiply(test_X.T, test_X.T).sum(axis=0)).T
     K = mat(np.ones((m, 1), dtype = float64)) * test_norms.T
     K = K + self.train_norms * mat(np.ones((1, n), dtype = float64))
     K = K - 2 * linkm
     K = - gamma * K
     K = np.exp(K)
     return K.A.T
Exemplo n.º 22
0
 def getKM(self, X):
     """Returns the kernel matrix between the basis vectors and X.
     
     Parameters
     ----------
     X : {array-like, sparse matrix}, shape = [n_samples, n_features]
     
     Returns
     -------
     K : array, shape = [n_samples, n_bvectors]
         kernel matrix
     """
     X = array_tools.as_2d_array(X, True)
     test_X = X
     degree, coef0, gamma = self.degree, self.coef0, self.gamma
     if sp.issparse(test_X):
         test_X = array_tools.spmat_resize(test_X, self.train_X.shape[1])
     else:
         test_X = array_tools.as_dense_matrix(test_X)
     train_X = self.train_X
     K = array_tools.as_array(train_X * test_X.T)
     K *= gamma
     K += coef0
     K = K ** degree
     return K.T
Exemplo n.º 23
0
 def __init__(self, X_valid, Y_valid, measure=sqerror, maxiter=10):
     self.X_valid = array_tools.as_matrix(X_valid)
     self.Y_valid = array_tools.as_2d_array(Y_valid)
     self.measure = measure
     self.bestperf = None
     self.bestA = None
     self.iter = 0
     self.last_update = 0
     self.maxiter = maxiter
Exemplo n.º 24
0
 def __init__(self, X_valid, Y_valid, measure=sqerror, maxiter=10):
     self.X_valid = array_tools.as_matrix(X_valid)
     self.Y_valid = array_tools.as_2d_array(Y_valid)
     self.measure = measure
     self.bestperf = None
     self.bestA = None
     self.iter = 0
     self.last_update = 0
     self.maxiter = maxiter
Exemplo n.º 25
0
 def __init__(self, X, gamma=1.0):
     X = array_tools.as_2d_array(X, True)
     if gamma <= 0.:
         raise Exception('ERROR: nonpositive kernel parameter for Gaussian kernel\n')
     self.train_X = X
     if sp.issparse(self.train_X):
         self.train_norms = ((self.train_X.T.multiply(self.train_X.T)).sum(axis=0)).T
     else:
         self.train_norms = np.mat((np.multiply(self.train_X.T, self.train_X.T).sum(axis=0))).T  
     self.gamma = gamma
Exemplo n.º 26
0
 def __init__(self, X, gamma=1.0):
     X = array_tools.as_2d_array(X, True)
     if gamma <= 0.:
         raise Exception('ERROR: nonpositive kernel parameter for Gaussian kernel\n')
     self.train_X = X
     if sp.issparse(self.train_X):
         self.train_norms = ((self.train_X.T.multiply(self.train_X.T)).sum(axis=0)).T
     else:
         self.train_norms = np.mat((np.multiply(self.train_X.T, self.train_X.T).sum(axis=0))).T  
     self.gamma = gamma
Exemplo n.º 27
0
 def __init__(self, X_valid, Y_valid, qids_valid = None, measure=sqmprank, maxiter=10):
     self.X_valid = array_tools.as_matrix(X_valid)
     self.Y_valid = array_tools.as_2d_array(Y_valid)
     self.qids_valid = qids_to_splits(qids_valid)
     self.measure = measure
     self.bestperf = None
     self.bestA = None
     self.iter = 0
     self.last_update = 0
     self.maxiter = maxiter
Exemplo n.º 28
0
def spearman(Y, P):
    """Spearman correlation.
    
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels
    
    Returns
    -------
    correlation : float
        number between -1 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(spearman_multitask(Y, P))
Exemplo n.º 29
0
def spearman(Y, P):
    """Spearman correlation.
    
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels
    
    Returns
    -------
    correlation : float
        number between -1 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(spearman_multitask(Y, P))
Exemplo n.º 30
0
 def __init__(self, X, Y, regparam = 1.0, qids = None, callbackfun=None, **kwargs):
     self.regparam = regparam
     self.callbackfun = None
     self.Y = array_tools.as_2d_array(Y)
     #Number of training examples
     self.size = Y.shape[0]
     if self.Y.shape[1] > 1:
         raise Exception('CGRankRLS does not currently work in multi-label mode')
     self.learn_from_labels = True
     self.callbackfun = callbackfun
     self.X = csc_matrix(X.T)
     if qids is not None:
         self.qids = map_qids(qids)
         self.splits = qids_to_splits(self.qids)
     else:
         self.qids = None
     regparam = self.regparam
     qids = self.qids
     if qids is not None:
         P = sp.lil_matrix((self.size, len(set(qids))))
         for qidind in range(len(self.splits)):
             inds = self.splits[qidind]
             qsize = len(inds)
             for i in inds:
                 P[i, qidind] = 1. / sqrt(qsize)
         P = P.tocsr()
         PT = P.tocsc().T
     else:
         P = 1./sqrt(self.size)*(np.mat(np.ones((self.size,1), dtype=np.float64)))
         PT = P.T
     X = self.X.tocsc()
     X_csr = X.tocsr()
     def mv(v):
         v = np.mat(v).T
         return X_csr*(X.T*v)-X_csr*(P*(PT*(X.T*v)))+regparam*v
     G = LinearOperator((X.shape[0],X.shape[0]), matvec=mv, dtype=np.float64)
     Y = self.Y
     if not self.callbackfun is None:
         def cb(v):
             self.A = np.mat(v).T
             self.b = np.mat(np.zeros((1,1)))
             self.callbackfun.callback(self)
     else:
         cb = None
     XLY = X_csr*Y-X_csr*(P*(PT*Y))
     try:
         self.A = np.mat(cg(G, XLY, callback=cb)[0]).T
     except Finished:
         pass
     self.b = np.mat(np.zeros((1,1)))
     self.predictor = predictor.LinearPredictor(self.A, self.b)
Exemplo n.º 31
0
    def __init__(self,
                 X,
                 Y,
                 regparam=1.0,
                 bias=1.0,
                 callbackfun=None,
                 **kwargs):
        self.Y = array_tools.as_2d_array(Y)
        self.X = csc_matrix(X.T)
        self.bias = bias
        self.regparam = regparam
        if self.bias != 0.:
            bias_slice = sqrt(self.bias) * np.mat(
                ones((1, self.X.shape[1]), dtype=np.float64))
            self.X = sparse.vstack([self.X, bias_slice]).tocsc()
        self.X_csr = self.X.tocsr()
        self.callbackfun = callbackfun
        self.results = {}
        regparam = self.regparam
        Y = self.Y
        X = self.X
        X_csr = self.X_csr

        def mv(v):
            return X.T * (X_csr * v) + regparam * v

        G = LinearOperator((X.shape[1], X.shape[1]),
                           matvec=mv,
                           dtype=np.float64)
        self.AA = []
        if not self.callbackfun == None:

            def cb(v):
                self.A = np.mat(v).T
                self.callbackfun.callback(self)
        else:
            cb = None
        try:
            self.A = np.mat(cg(G, Y, callback=cb)[0]).T
        except Finished:
            pass
        if self.callbackfun != None:
            self.callbackfun.finished(self)
        self.A = X_csr * self.A
        if self.bias == 0.:
            self.b = np.mat(np.zeros((1, 1)))
        else:
            self.b = sqrt(self.bias) * self.A[-1]
            self.A = self.A[:-1]
        #self.results['predictor'] = self.getModel()
        self.predictor = predictor.LinearPredictor(self.A, self.b)
Exemplo n.º 32
0
 def __init__(self,
              X_valid,
              Y_valid,
              qids_valid=None,
              measure=sqmprank,
              maxiter=10):
     self.X_valid = array_tools.as_matrix(X_valid)
     self.Y_valid = array_tools.as_2d_array(Y_valid)
     self.qids_valid = qids_to_splits(qids_valid)
     self.measure = measure
     self.bestperf = None
     self.bestA = None
     self.iter = 0
     self.last_update = 0
     self.maxiter = maxiter
Exemplo n.º 33
0
 def __init__(self, X, Y, qids, regparam = 1.0, kernel='LinearKernel', basis_vectors = None, **kwargs):
     kwargs["bias"] = 0.
     kwargs['kernel'] =  kernel
     kwargs['X'] = X
     if basis_vectors is not None:
         kwargs['basis_vectors'] = basis_vectors
     self.svdad = adapter.createSVDAdapter(**kwargs)
     self.Y = np.mat(array_tools.as_2d_array(Y))
     self.regparam = regparam
     self.svals = np.mat(self.svdad.svals)
     self.svecs = self.svdad.rsvecs
     self.size = self.Y.shape[0]
     self.size = self.Y.shape[0]
     self.qids = map_qids(qids)
     self.qidlist = qids_to_splits(self.qids)
     self.solve(self.regparam)
Exemplo n.º 34
0
 def __init__(self, X, Y, regparam = 1.0, kernel='LinearKernel', basis_vectors = None, **kwargs):
     self.Y = array_tools.as_2d_array(Y)
     if X.shape[0] != Y.shape[0]:
         raise Exception("First dimension of X and Y must be the same")
     if basis_vectors is not None:
         if X.shape[1] != basis_vectors.shape[1]:
             raise Exception("Number of columns for X and basis_vectors must be the same")
     kwargs['X'] = X
     kwargs['kernel'] = kernel
     if basis_vectors is not None:
         kwargs['basis_vectors'] = basis_vectors
     self.svdad = adapter.createSVDAdapter(**kwargs)
     self.regparam = regparam
     self.svals = np.mat(self.svdad.svals)
     self.svecs = np.mat(self.svdad.rsvecs)
     self.size = self.Y.shape[0]
     self.solve(self.regparam)   
Exemplo n.º 35
0
 def __init__(self, **kwargs):
     Y = kwargs["Y"]
     Y = np.mat(array_tools.as_2d_array(Y))
     if kwargs.has_key('K1'):
         K1 = np.mat(kwargs['K1'])
         K2 = np.mat(kwargs['K2'])
         Y = Y.reshape((K1.shape[0], K2.shape[0]), order = 'F')
         self.K1, self.K2 = K1, K2
         self.kernelmode = True
     else:
         X1 = np.mat(kwargs['X1'])
         X2 = np.mat(kwargs['X2'])
         Y = Y.reshape((X1.shape[0], X2.shape[0]), order = 'F')
         self.X1, self.X2 = X1, X2
         self.kernelmode = False
     self.Y = Y
     self.regparam1 = kwargs["regparam1"]
     self.regparam2 = kwargs["regparam2"]
     self.trained = False
     self.solve(self.regparam1, self.regparam2)
Exemplo n.º 36
0
 def __init__(self, **kwargs):
     Y = kwargs["Y"]
     Y = np.mat(array_tools.as_2d_array(Y))
     if kwargs.has_key('K1'):
         K1 = np.mat(kwargs['K1'])
         K2 = np.mat(kwargs['K2'])
         Y = Y.reshape((K1.shape[0], K2.shape[0]), order = 'F')
         self.K1, self.K2 = K1, K2
         self.kernelmode = True
     else:
         X1 = np.mat(kwargs['X1'])
         X2 = np.mat(kwargs['X2'])
         Y = Y.reshape((X1.shape[0], X2.shape[0]), order = 'F')
         self.X1, self.X2 = X1, X2
         self.kernelmode = False
     self.Y = Y
     self.regparam1 = kwargs["regparam1"]
     self.regparam2 = kwargs["regparam2"]
     self.trained = False
     self.solve(self.regparam1, self.regparam2)
Exemplo n.º 37
0
 def __init__(self, X, Y, regparam = 1.0, bias = 1.0, callbackfun = None, **kwargs):
     self.Y = array_tools.as_2d_array(Y)
     self.X = csc_matrix(X.T)
     self.bias = bias
     self.regparam = regparam
     if self.bias != 0.:
         bias_slice = sqrt(self.bias)*np.mat(ones((1,self.X.shape[1]),dtype=np.float64))
         self.X = sparse.vstack([self.X,bias_slice]).tocsc()
     self.X_csr = self.X.tocsr()
     self.callbackfun = callbackfun
     self.results = {}
     regparam = self.regparam
     Y = self.Y
     X = self.X
     X_csr = self.X_csr
     def mv(v):
         return X.T*(X_csr*v)+regparam*v
     G = LinearOperator((X.shape[1],X.shape[1]), matvec=mv, dtype=np.float64)
     self.AA = []
     if not self.callbackfun is None:
         def cb(v):
             self.A = np.mat(v).T
             self.callbackfun.callback(self)
     else:
         cb = None
     try:
         self.A = np.mat(cg(G, Y, callback=cb)[0]).T
     except Finished:
         pass
     if self.callbackfun is not None:
         self.callbackfun.finished(self)
     self.A = X_csr*self.A
     if self.bias == 0.:
         self.b = np.mat(np.zeros((1,1)))
     else:
         self.b = sqrt(self.bias)*self.A[-1]
         self.A = self.A[:-1]
     #self.results['predictor'] = self.getModel()
     self.predictor = predictor.LinearPredictor(self.A, self.b)   
Exemplo n.º 38
0
 def __init__(self, **kwargs):
     Y = kwargs["Y"]
     Y = array_tools.as_2d_array(Y)
     Y = np.mat(Y)
     if 'K1' in kwargs:
         K1 = np.mat(kwargs['K1'])
         K2 = np.mat(kwargs['K2'])
         Y = Y.reshape((K1.shape[0], K2.shape[0]), order='F')
         self.K1, self.K2 = K1, K2
         self.kernelmode = True
     else:
         X1 = np.mat(kwargs['X1'])
         X2 = np.mat(kwargs['X2'])
         Y = Y.reshape((X1.shape[0], X2.shape[0]), order='F')
         self.X1, self.X2 = X1, X2
         self.kernelmode = False
     self.Y = Y
     if "regparam" in kwargs:
         self.regparam = kwargs["regparam"]
     else:
         self.regparam = 1.
     self.trained = False
     self.solve(self.regparam)
Exemplo n.º 39
0
 def __init__(self, **kwargs):
     Y = kwargs["Y"]
     Y = array_tools.as_2d_array(Y)
     Y = np.mat(Y)
     if kwargs.has_key("K1"):
         K1 = np.mat(kwargs["K1"])
         K2 = np.mat(kwargs["K2"])
         Y = Y.reshape((K1.shape[0], K2.shape[0]), order="F")
         self.K1, self.K2 = K1, K2
         self.kernelmode = True
     else:
         X1 = np.mat(kwargs["X1"])
         X2 = np.mat(kwargs["X2"])
         Y = Y.reshape((X1.shape[0], X2.shape[0]), order="F")
         self.X1, self.X2 = X1, X2
         self.kernelmode = False
     self.Y = Y
     if kwargs.has_key("regparam"):
         self.regparam = kwargs["regparam"]
     else:
         self.regparam = 1.0
     self.trained = False
     self.solve(self.regparam)
Exemplo n.º 40
0
 def __init__(self,
              X,
              Y,
              qids,
              regparam=1.0,
              kernel='LinearKernel',
              basis_vectors=None,
              **kwargs):
     kwargs["bias"] = 0.
     kwargs['kernel'] = kernel
     kwargs['X'] = X
     if basis_vectors is not None:
         kwargs['basis_vectors'] = basis_vectors
     self.svdad = adapter.createSVDAdapter(**kwargs)
     self.Y = np.mat(array_tools.as_2d_array(Y))
     self.regparam = regparam
     self.svals = np.mat(self.svdad.svals)
     self.svecs = self.svdad.rsvecs
     self.size = self.Y.shape[0]
     self.size = self.Y.shape[0]
     self.qids = map_qids(qids)
     self.qidlist = qids_to_splits(self.qids)
     self.solve(self.regparam)
Exemplo n.º 41
0
 def __init__(self, X, Y, subsetsize, regparam = 1.0, bias=1.0, callbackfun=None, **kwargs):
     self.callbackfun = callbackfun
     self.regparam = regparam
     if isinstance(X, sp.base.spmatrix):
         self.X = X.todense()
     else:
         self.X = X
     self.X = self.X.T
     self.X = self.X.astype("float64", copy=False)
     self.Y = np.mat(array_tools.as_2d_array(Y))
     #Number of training examples
     self.size = self.Y.shape[0]
     self.bias = bias
     self.measure = None
     fsize = X.shape[1]
     self.desiredfcount = subsetsize
     if not fsize >= self.desiredfcount:
         raise Exception('The overall number of features ' + str(fsize) + ' is smaller than the desired number ' + str(self.desiredfcount) + ' of features to be selected.')
     self.results = {}
     if 'use_default_callback' in kwargs and bool(kwargs['use_default_callback']):
         self.callbackfun = DefaultCallback(**kwargs)
     #The current version works only with the squared error measure
     self._solve_cython(self.regparam)
Exemplo n.º 42
0
 def __init__(self,
              X,
              Y,
              subsetsize,
              regparam=1.0,
              bias=1.0,
              measure=None,
              callbackfun=None,
              **kwargs):
     self.callbackfun = callbackfun
     self.regparam = regparam
     if isinstance(X, sp.base.spmatrix):
         self.X = X.todense()
     else:
         self.X = X
     self.X = self.X.T
     self.Y = array_tools.as_2d_array(Y)
     #Number of training examples
     self.size = self.Y.shape[0]
     #if not self.Y.shape[1] == 1:
     #    raise Exception('GreedyRLS currently supports only one output at a time. The output matrix is now of shape ' + str(self.Y.shape) + '.')
     self.bias = bias
     self.measure = measure
     fsize = X.shape[1]
     self.desiredfcount = subsetsize
     if not fsize >= self.desiredfcount:
         raise Exception('The overall number of features ' + str(fsize) +
                         ' is smaller than the desired number ' +
                         str(self.desiredfcount) +
                         ' of features to be selected.')
     self.results = {}
     ##The current version works only with the squared error measure
     #self.measure = None
     #self.solve_bu(self.regparam)
     #return
     #if not self.Y.shape[1] == 1:
     self.solve_weak(self.regparam)
Exemplo n.º 43
0
 def getKM(self, X):
     """Returns the kernel matrix between the basis vectors and X.
     
     Parameters
     ----------
     X : {array-like, sparse matrix}, shape = [n_samples, n_features]
     
     Returns
     -------
     K : array, shape = [n_samples, n_bvectors]
         kernel matrix
     """
     X = array_tools.as_2d_array(X, True)
     test_X = X
     if sp.issparse(test_X):
         test_X = array_tools.spmat_resize(test_X, self.train_X.shape[1])
     else:
         test_X = array_tools.as_dense_matrix(test_X)
     train_X = self.train_X
     K = train_X * test_X.T
     K = array_tools.as_array(K)
     if self.bias != 0:
         K += self.bias
     return K.T
Exemplo n.º 44
0
 def getKM(self, X):
     """Returns the kernel matrix between the basis vectors and X.
     
     Parameters
     ----------
     X : {array-like, sparse matrix}, shape = [n_samples, n_features]
     
     Returns
     -------
     K : array, shape = [n_samples, n_bvectors]
         kernel matrix
     """
     X = array_tools.as_2d_array(X, True)
     test_X = X
     if sp.issparse(test_X):
         test_X = array_tools.spmat_resize(test_X, self.train_X.shape[1])
     else:
         test_X = array_tools.as_dense_matrix(test_X)
     train_X = self.train_X
     K = train_X * test_X.T
     K = array_tools.as_array(K)
     if self.bias != 0:
         K += self.bias
     return K.T
Exemplo n.º 45
0
    def __init__(self, **kwargs):
        Y = kwargs["Y"]
        self.input1_inds = np.array(kwargs["label_row_inds"], dtype=np.int32)
        self.input2_inds = np.array(kwargs["label_col_inds"], dtype=np.int32)
        Y = array_tools.as_2d_array(Y)
        self.Y = np.mat(Y)
        self.trained = False
        if "regparam" in kwargs:
            self.regparam = kwargs["regparam"]
        else:
            self.regparam = 0.
        if CALLBACK_FUNCTION in kwargs:
            self.callbackfun = kwargs[CALLBACK_FUNCTION]
        else:
            self.callbackfun = None
        if "compute_risk" in kwargs:
            self.compute_risk = kwargs["compute_risk"]
        else:
            self.compute_risk = False

        regparam = self.regparam
        if 'K1' in kwargs:

            K1 = kwargs['K1']
            K2 = kwargs['K2']

            if 'maxiter' in kwargs: maxiter = int(kwargs['maxiter'])
            else: maxiter = None

            Y = np.array(self.Y).ravel(order='F')
            self.bestloss = float("inf")

            def mv(v):
                return sampled_kronecker_products.sampled_vec_trick(
                    v, K2, K1, self.input2_inds, self.input1_inds,
                    self.input2_inds, self.input1_inds) + regparam * v

            def mv_mk(v):
                vsum = regparam * v
                for i in range(len(K1)):
                    K1i = K1[i]
                    K2i = K2[i]
                    inds2 = self.input2_inds[i]
                    inds1 = self.input1_inds[i]
                    vsum += weights[
                        i] * sampled_kronecker_products.sampled_vec_trick(
                            v, K2i, K1i, inds2, inds1, inds2, inds1)
                return vsum

            def mvr(v):
                raise Exception('You should not be here!')

            def cgcb(v):
                if self.compute_risk:
                    P = sampled_kronecker_products.sampled_vec_trick(
                        v, K2, K1, self.input2_inds, self.input1_inds,
                        self.input2_inds, self.input1_inds)
                    z = (Y - P)
                    Ka = sampled_kronecker_products.sampled_vec_trick(
                        v, K2, K1, self.input2_inds, self.input1_inds,
                        self.input2_inds, self.input1_inds)
                    loss = (np.dot(z, z) + regparam * np.dot(v, Ka))
                    print("loss", 0.5 * loss)
                    if loss < self.bestloss:
                        self.A = v.copy()
                        self.bestloss = loss
                else:
                    self.A = v
                if not self.callbackfun is None:
                    self.predictor = KernelPairwisePredictor(
                        self.A, self.input1_inds, self.input2_inds)
                    self.callbackfun.callback(self)

            if isinstance(K1, (list, tuple)):
                if 'weights' in kwargs: weights = kwargs['weights']
                else: weights = np.ones((len(K1)))
                G = LinearOperator(
                    (len(self.input1_inds[0]), len(self.input1_inds[0])),
                    matvec=mv_mk,
                    rmatvec=mvr,
                    dtype=np.float64)
            else:
                weights = None
                G = LinearOperator(
                    (len(self.input1_inds), len(self.input1_inds)),
                    matvec=mv,
                    rmatvec=mvr,
                    dtype=np.float64)
            self.A = minres(G,
                            self.Y,
                            maxiter=maxiter,
                            callback=cgcb,
                            tol=1e-20)[0]
            self.predictor = KernelPairwisePredictor(self.A, self.input1_inds,
                                                     self.input2_inds, weights)
        else:
            X1 = kwargs['X1']
            X2 = kwargs['X2']
            self.X1, self.X2 = X1, X2

            if 'maxiter' in kwargs: maxiter = int(kwargs['maxiter'])
            else: maxiter = None

            if isinstance(X1, (list, tuple)):
                raise NotImplementedError(
                    "Got list or tuple as X1 but multiple kernel learning has not been implemented for the proal case yet."
                )
                x1tsize, x1fsize = X1[0].shape  #m, d
                x2tsize, x2fsize = X2[0].shape  #q, r
            else:
                x1tsize, x1fsize = X1.shape  #m, d
                x2tsize, x2fsize = X2.shape  #q, r

            kronfcount = x1fsize * x2fsize

            Y = np.array(self.Y).ravel(order='F')
            self.bestloss = float("inf")

            def mv(v):
                v_after = sampled_kronecker_products.sampled_vec_trick(
                    v, X2, X1, self.input2_inds, self.input1_inds)
                v_after = sampled_kronecker_products.sampled_vec_trick(
                    v_after, X2.T, X1.T, None, None, self.input2_inds,
                    self.input1_inds) + regparam * v
                return v_after

            def mv_mk(v):
                vsum = regparam * v
                for i in range(len(X1)):
                    X1i = X1[i]
                    X2i = X2[i]
                    v_after = sampled_kronecker_products.sampled_vec_trick(
                        v, X2i, X1i, self.input2_inds, self.input1_inds)
                    v_after = sampled_kronecker_products.sampled_vec_trick(
                        v_after, X2i.T, X1i.T, None, None, self.input2_inds,
                        self.input1_inds)
                    vsum = vsum + v_after
                return vsum

            def mvr(v):
                raise Exception('You should not be here!')
                return None

            def cgcb(v):
                if self.compute_risk:
                    P = sampled_kronecker_products.sampled_vec_trick(
                        v, X2, X1, self.input2_inds, self.input1_inds)
                    z = (Y - P)
                    loss = (np.dot(z, z) + regparam * np.dot(v, v))
                    if loss < self.bestloss:
                        self.W = v.copy().reshape((x1fsize, x2fsize),
                                                  order='F')
                        self.bestloss = loss
                else:
                    self.W = v.reshape((x1fsize, x2fsize), order='F')
                if not self.callbackfun is None:
                    self.predictor = LinearPairwisePredictor(self.W)
                    self.callbackfun.callback(self)

            if isinstance(X1, (list, tuple)):
                G = LinearOperator((kronfcount, kronfcount),
                                   matvec=mv_mk,
                                   rmatvec=mvr,
                                   dtype=np.float64)
                vsum = np.zeros(kronfcount)
                v_init = np.array(self.Y).reshape(self.Y.shape[0])
                for i in range(len(X1)):
                    X1i = X1[i]
                    X2i = X2[i]
                    vsum += sampled_kronecker_products.sampled_vec_trick(
                        v_init, X2i.T, X1i.T, None, None, self.input2_inds,
                        self.input1_inds)
                v_init = vsum
            else:
                G = LinearOperator((kronfcount, kronfcount),
                                   matvec=mv,
                                   rmatvec=mvr,
                                   dtype=np.float64)
                v_init = np.array(self.Y).reshape(self.Y.shape[0])
                v_init = sampled_kronecker_products.sampled_vec_trick(
                    v_init, X2.T, X1.T, None, None, self.input2_inds,
                    self.input1_inds)

            v_init = np.array(v_init).reshape(kronfcount)
            if 'warm_start' in kwargs:
                x0 = np.array(kwargs['warm_start']).reshape(kronfcount,
                                                            order='F')
            else:
                x0 = None
            minres(G, v_init, x0=x0, maxiter=maxiter, callback=cgcb,
                   tol=1e-20)[0].reshape((x1fsize, x2fsize), order='F')
            self.predictor = LinearPairwisePredictor(self.W)
            if not self.callbackfun is None:
                self.callbackfun.finished(self)
Exemplo n.º 46
0
    def __init__(self,
                 X,
                 Y,
                 regparam=1.0,
                 qids=None,
                 callbackfun=None,
                 **kwargs):
        self.regparam = regparam
        self.callbackfun = None
        self.Y = array_tools.as_2d_array(Y)
        #Number of training examples
        self.size = Y.shape[0]
        if self.Y.shape[1] > 1:
            raise Exception(
                'CGRankRLS does not currently work in multi-label mode')
        self.learn_from_labels = True
        self.callbackfun = callbackfun
        self.X = csc_matrix(X.T)
        if qids is not None:
            self.qids = map_qids(qids)
            self.splits = qids_to_splits(self.qids)
        else:
            self.qids = None
        regparam = self.regparam
        qids = self.qids
        if qids is not None:
            P = sp.lil_matrix((self.size, len(set(qids))))
            for qidind in range(len(self.splits)):
                inds = self.splits[qidind]
                qsize = len(inds)
                for i in inds:
                    P[i, qidind] = 1. / sqrt(qsize)
            P = P.tocsr()
            PT = P.tocsc().T
        else:
            P = 1. / sqrt(self.size) * (np.mat(
                np.ones((self.size, 1), dtype=np.float64)))
            PT = P.T
        X = self.X.tocsc()
        X_csr = X.tocsr()

        def mv(v):
            v = np.mat(v).T
            return X_csr * (X.T * v) - X_csr * (P * (PT *
                                                     (X.T * v))) + regparam * v

        G = LinearOperator((X.shape[0], X.shape[0]),
                           matvec=mv,
                           dtype=np.float64)
        Y = self.Y
        if not self.callbackfun is None:

            def cb(v):
                self.A = np.mat(v).T
                self.b = np.mat(np.zeros((1, 1)))
                self.callbackfun.callback(self)
        else:
            cb = None
        XLY = X_csr * Y - X_csr * (P * (PT * Y))
        try:
            self.A = np.mat(cg(G, XLY, callback=cb)[0]).T
        except Finished:
            pass
        self.b = np.mat(np.zeros((1, 1)))
        self.predictor = predictor.LinearPredictor(self.A, self.b)
Exemplo n.º 47
0
    def __init__(self, **kwargs):
        self.resource_pool = kwargs
        Y = kwargs["Y"]
        self.input1_inds = np.array(kwargs["label_row_inds"], dtype=np.int32)
        self.input2_inds = np.array(kwargs["label_col_inds"], dtype=np.int32)
        Y = array_tools.as_2d_array(Y)
        self.Y = np.mat(Y)
        self.trained = False
        if kwargs.has_key("regparam"):
            self.regparam = kwargs["regparam"]
        else:
            self.regparam = 0.0
        if kwargs.has_key(CALLBACK_FUNCTION):
            self.callbackfun = kwargs[CALLBACK_FUNCTION]
        else:
            self.callbackfun = None
        if kwargs.has_key("compute_risk"):
            self.compute_risk = kwargs["compute_risk"]
        else:
            self.compute_risk = False

        regparam = self.regparam
        if self.resource_pool.has_key("K1"):

            K1 = self.resource_pool["K1"]
            K2 = self.resource_pool["K2"]

            if "maxiter" in self.resource_pool:
                maxiter = int(self.resource_pool["maxiter"])
            else:
                maxiter = None

            Y = np.array(self.Y).ravel(order="F")
            self.bestloss = float("inf")

            def mv(v):
                return (
                    sampled_kronecker_products.sampled_vec_trick(
                        v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds
                    )
                    + regparam * v
                )

            def mvr(v):
                raise Exception("You should not be here!")

            def cgcb(v):
                if self.compute_risk:
                    P = sampled_kronecker_products.sampled_vec_trick(
                        v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds
                    )
                    z = Y - P
                    Ka = sampled_kronecker_products.sampled_vec_trick(
                        v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds
                    )
                    loss = np.dot(z, z) + regparam * np.dot(v, Ka)
                    print "loss", 0.5 * loss
                    if loss < self.bestloss:
                        self.A = v.copy()
                        self.bestloss = loss
                else:
                    self.A = v
                if not self.callbackfun is None:
                    self.predictor = KernelPairwisePredictor(self.A, self.input1_inds, self.input2_inds)
                    self.callbackfun.callback(self)

            G = LinearOperator((len(self.input1_inds), len(self.input1_inds)), matvec=mv, rmatvec=mvr, dtype=np.float64)
            self.A = minres(G, self.Y, maxiter=maxiter, callback=cgcb, tol=1e-20)[0]
            self.predictor = KernelPairwisePredictor(self.A, self.input1_inds, self.input2_inds)
        else:
            X1 = self.resource_pool["X1"]
            X2 = self.resource_pool["X2"]
            self.X1, self.X2 = X1, X2

            if "maxiter" in self.resource_pool:
                maxiter = int(self.resource_pool["maxiter"])
            else:
                maxiter = None

            x1tsize, x1fsize = X1.shape  # m, d
            x2tsize, x2fsize = X2.shape  # q, r

            kronfcount = x1fsize * x2fsize

            Y = np.array(self.Y).ravel(order="F")
            self.bestloss = float("inf")

            def mv(v):
                v_after = sampled_kronecker_products.sampled_vec_trick(v, X2, X1, self.input2_inds, self.input1_inds)
                v_after = (
                    sampled_kronecker_products.sampled_vec_trick(
                        v_after, X2.T, X1.T, None, None, self.input2_inds, self.input1_inds
                    )
                    + regparam * v
                )
                return v_after

            def mvr(v):
                raise Exception("You should not be here!")
                return None

            def cgcb(v):
                if self.compute_risk:
                    P = sampled_kronecker_products.sampled_vec_trick(v, X2, X1, self.input2_inds, self.input1_inds)
                    z = Y - P
                    loss = np.dot(z, z) + regparam * np.dot(v, v)
                    if loss < self.bestloss:
                        self.W = v.copy().reshape((x1fsize, x2fsize), order="F")
                        self.bestloss = loss
                else:
                    self.W = v.reshape((x1fsize, x2fsize), order="F")
                if not self.callbackfun is None:
                    self.predictor = LinearPairwisePredictor(self.W)
                    self.callbackfun.callback(self)

            G = LinearOperator((kronfcount, kronfcount), matvec=mv, rmatvec=mvr, dtype=np.float64)

            v_init = np.array(self.Y).reshape(self.Y.shape[0])
            v_init = sampled_kronecker_products.sampled_vec_trick(
                v_init, X2.T, X1.T, None, None, self.input2_inds, self.input1_inds
            )
            v_init = np.array(v_init).reshape(kronfcount)
            if self.resource_pool.has_key("warm_start"):
                x0 = np.array(self.resource_pool["warm_start"]).reshape(kronfcount, order="F")
            else:
                x0 = None
            minres(G, v_init, x0=x0, maxiter=maxiter, callback=cgcb, tol=1e-20)[0].reshape(
                (x1fsize, x2fsize), order="F"
            )
            self.predictor = LinearPairwisePredictor(self.W)
            if not self.callbackfun is None:
                self.callbackfun.finished(self)
Exemplo n.º 48
0
 def __init__(self, **kwargs):
     self.resource_pool = kwargs
     Y = kwargs["Y"]
     self.input1_inds = np.array(kwargs["label_row_inds"], dtype = np.int32)
     self.input2_inds = np.array(kwargs["label_col_inds"], dtype = np.int32)
     Y = array_tools.as_2d_array(Y)
     self.Y = np.mat(Y)
     self.trained = False
     if kwargs.has_key("regparam"):
         self.regparam = kwargs["regparam"]
     else:
         self.regparam = 0.
     if kwargs.has_key(CALLBACK_FUNCTION):
         self.callbackfun = kwargs[CALLBACK_FUNCTION]
     else:
         self.callbackfun = None
     if kwargs.has_key("compute_risk"):
         self.compute_risk = kwargs["compute_risk"]
     else:
         self.compute_risk = False
     
     regparam = self.regparam
     if self.resource_pool.has_key('K1'):
         
         K1 = self.resource_pool['K1']
         K2 = self.resource_pool['K2']
         
         if 'maxiter' in self.resource_pool: maxiter = int(self.resource_pool['maxiter'])
         else: maxiter = None
         
         Y = np.array(self.Y).ravel(order = 'F')
         self.bestloss = float("inf")
         def mv(v):
             return sampled_kronecker_products.sampled_vec_trick(v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds) + regparam * v
         
         def mvr(v):
             raise Exception('You should not be here!')
         
         def cgcb(v):
             if self.compute_risk:
                 P =  sampled_kronecker_products.sampled_vec_trick(v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds)
                 z = (Y - P)
                 Ka = sampled_kronecker_products.sampled_vec_trick(v, K2, K1, self.input2_inds, self.input1_inds, self.input2_inds, self.input1_inds)
                 loss = (np.dot(z,z)+regparam*np.dot(v,Ka))
                 print "loss", 0.5*loss
                 if loss < self.bestloss:
                     self.A = v.copy()
                     self.bestloss = loss
             else:
                 self.A = v
             if not self.callbackfun == None:
                 self.predictor = KernelPairwisePredictor(self.A, self.input1_inds, self.input2_inds)
                 self.callbackfun.callback(self)
 
         
         G = LinearOperator((len(self.input1_inds), len(self.input1_inds)), matvec = mv, rmatvec = mvr, dtype = np.float64)
         minres(G, self.Y, maxiter = maxiter, callback = cgcb, tol=1e-20)[0]
         self.predictor = KernelPairwisePredictor(self.A, self.input1_inds, self.input2_inds)
     else:
         X1 = self.resource_pool['X1']
         X2 = self.resource_pool['X2']
         self.X1, self.X2 = X1, X2
         
         if 'maxiter' in self.resource_pool: maxiter = int(self.resource_pool['maxiter'])
         else: maxiter = None
         
         x1tsize, x1fsize = X1.shape #m, d
         x2tsize, x2fsize = X2.shape #q, r
         
         kronfcount = x1fsize * x2fsize
         
         Y = np.array(self.Y).ravel(order = 'F')
         self.bestloss = float("inf")
         def mv(v):
             #v_after = sampled_kronecker_products.x_gets_subset_of_A_kron_B_times_v(v, X1, X2.T, label_row_inds, label_col_inds)
             v_after = sampled_kronecker_products.sampled_vec_trick(v, X2, X1, self.input2_inds, self.input1_inds)
             #v_after = sampled_kronecker_products.x_gets_A_kron_B_times_sparse_v(v_after, X1.T, X2, label_row_inds, label_col_inds) + regparam * v
             v_after = sampled_kronecker_products.sampled_vec_trick(v_after, X2.T, X1.T, None, None, self.input2_inds, self.input1_inds) + regparam * v
             return v_after
         
         def mvr(v):
             raise Exception('You should not be here!')
             return None
         
         def cgcb(v):
             #self.W = v.reshape((x1fsize, x2fsize), order = 'F')
             if self.compute_risk:
                 #P = sampled_kronecker_products.x_gets_subset_of_A_kron_B_times_v(v, X1, X2.T, label_row_inds, label_col_inds)
                 P = sampled_kronecker_products.sampled_vec_trick(v, X2, X1, self.input2_inds, self.input1_inds)
                 z = (Y - P)
                 loss = (np.dot(z,z)+regparam*np.dot(v,v))
                 if loss < self.bestloss:
                     self.W = v.copy().reshape((x1fsize, x2fsize), order = 'F')
                     self.bestloss = loss
             else:
                 self.W = v.reshape((x1fsize, x2fsize), order = 'F')
             if not self.callbackfun == None:
                 self.predictor = LinearPairwisePredictor(self.W)
                 self.callbackfun.callback(self)
             
         G = LinearOperator((kronfcount, kronfcount), matvec = mv, rmatvec = mvr, dtype = np.float64)
         
         v_init = np.array(self.Y).reshape(self.Y.shape[0])
         #v_init = sampled_kronecker_products.x_gets_A_kron_B_times_sparse_v(v_init, X1.T, X2, label_row_inds, label_col_inds)
         v_init = sampled_kronecker_products.sampled_vec_trick(v_init, X2.T, X1.T, None, None, self.input2_inds, self.input1_inds)
         v_init = np.array(v_init).reshape(kronfcount)
         if self.resource_pool.has_key('warm_start'):
             x0 = np.array(self.resource_pool['warm_start']).reshape(kronfcount, order = 'F')
         else:
             x0 = None
         #self.W = bicgstab(G, v_init, x0 = x0, maxiter = maxiter, callback = cgcb)[0].reshape((x1fsize, x2fsize), order='F')
         minres(G, v_init, x0 = x0, maxiter = maxiter, callback = cgcb, tol=1e-20)[0].reshape((x1fsize, x2fsize), order='F')
         self.predictor = LinearPairwisePredictor(self.W)
         if not self.callbackfun == None:
                 self.callbackfun.finished(self)
Exemplo n.º 49
0
 def __init__(self, X, bias=1.0):
     X = array_tools.as_2d_array(X, True)
     self.train_X = X
     self.bias = bias
Exemplo n.º 50
0
 def __init__(self, X, degree=2, gamma=1.0, coef0=0):
     X = array_tools.as_2d_array(X, True)
     self.train_X = X
     self.degree = degree
     self.gamma = gamma
     self.coef0 = coef0
Exemplo n.º 51
0
 def __init__(self, X, bias=1.0):
     X = array_tools.as_2d_array(X, True)
     self.train_X = X
     self.bias = bias