示例#1
0
def cindex(Y, P):
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = cindex_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    return np.mean(perfs)
示例#2
0
def disagreement(Y, P):
    """Disagreement error, also known as the pairwise ranking error.
    
    A performance measure for ranking problems. Computes the number
    of pairwise disagreements between the correct and predicted rankings.
    An O(n^2)-time implementation, can be slow for large problems (loglinear
    time implementation would be possible using search trees). For query-structured
    data, one would typically want to compute the disagreement separately for each query,
    and average.
    
    If 2-dimensional arrays are supplied as arguments, then disagreement
    is separately computed for each column, after which the disagreements
    are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct utility values, can be any real numbers
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    disagreement: float
        number between 0 and 1
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = disagreement_multitask(Y,P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance("No pairs, all the instances have the same label")
    perf = np.mean(perfs)
    return perf
示例#3
0
def accuracy(Y, P):
    """Binary classification accuracy.
    
    A performance measure for binary classification problems.
    Returns the fraction of correct class predictions. P[i]>0 is
    considered a positive class prediction and P[i]<0 negative.
    P[i]==0 is considered as classifier abstaining to make a decision,
    which incurs 0.5 errors (in contrast to 0 error for correct and 1
    error for incorrect prediction).
    
    If 2-dimensional arrays are supplied as arguments, then accuracy
    is separately computed for each column, after which the accuracies
    are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    accuracy: float
        number between 0 and 1
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)    
    return np.mean(accuracy_multitask(Y, P))
示例#4
0
def fscore(Y, P):
    """F1-Score.
    
    A performance measure for binary classification problems.
    F1 = 2*(Precision*Recall)/(Precision+Recall)
    
    If 2-dimensional arrays are supplied as arguments, then macro-averaged
    F-score is computed over the columns.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. P[i]>0 is treated
        as a positive, and P[i]<=0 as a negative class prediction.
    
    Returns
    -------
    fscore: float
        number between 0 and 1
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    return np.mean(fscore_multitask(Y,P))
示例#5
0
def cindex(Y, P):
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = cindex_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    return np.mean(perfs)
示例#6
0
def fscore(Y, P):
    """F1-Score.
    
    A performance measure for binary classification problems.
    F1 = 2*(Precision*Recall)/(Precision+Recall)
    
    If 2-dimensional arrays are supplied as arguments, then macro-averaged
    F-score is computed over the columns.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. P[i]>0 is treated
        as a positive, and P[i]<=0 as a negative class prediction.
    
    Returns
    -------
    fscore: float
        number between 0 and 1
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    return np.mean(fscore_multitask(Y, P))
示例#7
0
def cindex(Y, P):
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = cindex_multitask(Y,P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance("No pairs, all the instances have the same output")
    return np.mean(perfs)
示例#8
0
def cindex(Y, P):
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = cindex_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance(
            "No pairs, all the instances have the same output")
    return np.mean(perfs)
示例#9
0
文件: rls.py 项目: peknau/RLScore
 def __init__(self, svdad, train_labels, regparam=1.0):
     self.svdad = svdad
     self.Y = array_tools.as_labelmatrix(train_labels)
     self.regparam = regparam
     self.svals = svdad.svals
     self.svecs = svdad.rsvecs
     self.results = {}
示例#10
0
文件: mmc.py 项目: peknau/RLScore
 def __init__(self, svdad, number_of_clusters=2, regparam=1.0, train_labels = None, fixed_indices=None, callback_obj=None):
     self.labelcount = number_of_clusters
     self.svdad = svdad
     self.regparam = regparam
     self.svals = svdad.svals
     self.svecs = svdad.rsvecs
     self.constraint = 0
     if self.labelcount == 2:
         self.oneclass = True
     else:
         self.oneclass = False
     #if not self.resource_pool.has_key('number_of_clusters'):
     #    raise Exception("Parameter 'number_of_clusters' must be given.")
     self.labelcount = number_of_clusters
     self.callbackfun = callback_obj
     if train_labels != None:
         Y_orig = array_tools.as_labelmatrix(train_labels)
         if Y_orig.shape[1] == 1:
             self.Y = mat(zeros((Y_orig.shape[0], 2)))
             self.Y[:, 0] = Y_orig
             self.Y[:, 1] = - Y_orig
             self.oneclass = True
         else:
             self.Y = Y_orig.copy()
             self.oneclass = False
         for i in range(self.Y.shape[0]):
             largestind = 0
             largestval = self.Y[i, 0]
             for j in range(self.Y.shape[1]):
                 if self.Y[i, j] > largestval:
                     largestind = j
                     largestval = self.Y[i, j]
                 self.Y[i, j] = -1.
             self.Y[i, largestind] = 1.
     else:
         size = self.svecs.shape[0]
         ysize = self.labelcount
         if self.labelcount == None: self.labelcount = 2
         self.Y = RandomLabelSource(size, ysize).readLabels()
     self.size = self.Y.shape[0]
     self.labelcount = self.Y.shape[1]
     self.classvec = - mat(ones((self.size, 1), dtype = int32))
     self.classcounts = mat(zeros((self.labelcount, 1), dtype = int32))
     for i in range(self.size):
         clazzind = 0
         largestlabel = self.Y[i, 0]
         for j in range(self.labelcount):
             if self.Y[i, j] > largestlabel:
                 largestlabel = self.Y[i, j]
                 clazzind = j
         self.classvec[i] = clazzind
         self.classcounts[clazzind] = self.classcounts[clazzind] + 1
     
     self.svecs_list = []
     for i in range(self.size):
         self.svecs_list.append(self.svecs[i].T)
     self.fixedindices = []
     if fixed_indices != None:
         self.fixedindices = fixed_indices
     self.results = {}
示例#11
0
 def loadResources(self):
     AbstractIterativeLearner.loadResources(self)
     if data_sources.TRAIN_LABELS in self.resource_pool:
         Y = self.resource_pool[data_sources.TRAIN_LABELS]
         self.Y = array_tools.as_labelmatrix(Y)
         #Number of training examples
         self.size = Y.shape[0]
         if Y.shape[1] > 1:
             raise Exception('CGRankRLS does not currently work in multi-label mode')
         self.learn_from_labels = True
         if (data_sources.VALIDATION_FEATURES in self.resource_pool) and (data_sources.VALIDATION_LABELS in self.resource_pool):
             validation_X = self.resource_pool[data_sources.VALIDATION_FEATURES]
             validation_Y = self.resource_pool[data_sources.VALIDATION_LABELS]
             if data_sources.VALIDATION_QIDS in self.resource_pool:
                 validation_qids = self.resource_pool[data_sources.VALIDATION_QIDS]
             else:
                 validation_qids = None
             self.callbackfun = EarlyStopCB(validation_X, validation_Y, validation_qids)
     elif data_sources.TRAIN_PREFERENCES in self.resource_pool:
         self.pairs = self.resource_pool[data_sources.TRAIN_PREFERENCES]
         self.learn_from_labels = False
     else:
         raise Exception('Neither labels nor preference information found')
     X = self.resource_pool[data_sources.TRAIN_FEATURES]
     self.X = csc_matrix(X.T)
     self.bias = 0.
     if data_sources.TRAIN_QIDS in self.resource_pool:
         qids = self.resource_pool[data_sources.TRAIN_QIDS]
         self.setQids(qids)
     self.results = {}
示例#12
0
 def __init__(self, svdad, train_labels, regparam=1.0):
     self.svdad = svdad
     self.Y = array_tools.as_labelmatrix(train_labels)
     self.size = self.Y.shape[0]
     self.regparam = regparam
     self.svals = svdad.svals
     self.svecs = svdad.rsvecs
     self.results = {}
示例#13
0
 def __init__(self, X_valid, Y_valid, measure=sqerror, maxiter=10):
     self.X_valid = array_tools.as_matrix(X_valid)
     self.Y_valid = array_tools.as_labelmatrix(Y_valid)
     self.measure = measure
     self.bestperf = None
     self.bestA = None
     self.iter = 0
     self.last_update = 0
     self.maxiter = maxiter
示例#14
0
 def __init__(self, svdad, train_labels, train_qids, regparam=1.0):
     self.svdad = svdad
     self.Y = array_tools.as_labelmatrix(train_labels)
     self.size = self.Y.shape[0]
     self.regparam = regparam
     self.svals = svdad.svals
     self.svecs = svdad.rsvecs
     self.setQids(train_qids)
     self.results = {}
示例#15
0
 def __init__(self, **kwargs):
     self.svdad = creators.createSVDAdapter(**kwargs)
     self.Y = array_tools.as_labelmatrix(kwargs["train_labels"])
     if kwargs.has_key("regparam"):
         self.regparam = float(kwargs["regparam"])
     else:
         self.regparam = 1.
     self.svals = self.svdad.svals
     self.svecs = self.svdad.rsvecs
     self.results = {}
示例#16
0
文件: rls.py 项目: max291/RLScore
 def __init__(self, **kwargs):
     self.svdad = creators.createSVDAdapter(**kwargs)
     self.Y = array_tools.as_labelmatrix(kwargs["train_labels"])
     if kwargs.has_key("regparam"):
         self.regparam = float(kwargs["regparam"])
     else:
         self.regparam = 1.
     self.svals = self.svdad.svals
     self.svecs = self.svdad.rsvecs
     self.results = {}
示例#17
0
 def loadResources(self):
     Y = self.resource_pool[data_sources.TRAIN_LABELS]
     self.label_row_inds = array(self.resource_pool["label_row_inds"], dtype=int32)
     self.label_col_inds = array(self.resource_pool["label_col_inds"], dtype=int32)
     Y = array_tools.as_labelmatrix(Y)
     self.Y = Y
     self.trained = False
     if self.resource_pool.has_key(data_sources.CALLBACK_FUNCTION):
         self.callbackfun = self.resource_pool[data_sources.CALLBACK_FUNCTION]
     else:
         self.callbackfun = None
示例#18
0
def sqerror(Y, P):
    """Mean squared error.
    
    A performance measure for regression problems. Computes the sum of (Y[i]-P[i])**2
    over all index pairs, normalized by the number of instances.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Correct utility values, can be any real numbers
    P: {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error: float
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    return np.mean(sqerror_multitask(Y,P))
示例#19
0
def sqerror(Y, P):
    """Mean squared error.
    
    A performance measure for regression problems. Computes the sum of (Y[i]-P[i])**2
    over all index pairs, normalized by the number of instances.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Correct utility values, can be any real numbers
    P: {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error: float
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    return np.mean(sqerror_multitask(Y, P))
示例#20
0
def sqmprank(Y, P):
    """Squared magnitude preserving ranking error.
    
    A performance measure for ranking problems. Computes the sum of (Y[i]-Y[j]-P[i]+P[j])**2
    over all index pairs. normalized by the number of pairs. For query-structured data,
    one would typically want to compute the error separately for each query, and average.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct utility values, can be any real numbers
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error: float
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    return np.mean(sqmprank_multitask(Y, P))
示例#21
0
def disagreement(Y, P):
    """Disagreement error, also known as the pairwise ranking error.
    
    A performance measure for ranking problems. Computes the number
    of pairwise disagreements between the correct and predicted rankings.
    An O(n^2)-time implementation, can be slow for large problems (loglinear
    time implementation would be possible using search trees). For query-structured
    data, one would typically want to compute the disagreement separately for each query,
    and average.
    
    If 2-dimensional arrays are supplied as arguments, then disagreement
    is separately computed for each column, after which the disagreements
    are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct utility values, can be any real numbers
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    disagreement: float
        number between 0 and 1
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = disagreement_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance(
            "No pairs, all the instances have the same label")
    perf = np.mean(perfs)
    return perf
示例#22
0
def sqmprank(Y, P):
    """Squared magnitude preserving ranking error.
    
    A performance measure for ranking problems. Computes the sum of (Y[i]-Y[j]-P[i]+P[j])**2
    over all index pairs. normalized by the number of pairs. For query-structured data,
    one would typically want to compute the error separately for each query, and average.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct utility values, can be any real numbers
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error: float
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    return np.mean(sqmprank_multitask(Y, P))
示例#23
0
 def __init__(self, **kwargs):
     self.resource_pool = kwargs
     Y = kwargs[TRAIN_LABELS]
     self.label_row_inds = np.array(kwargs["label_row_inds"], dtype = np.int32)
     self.label_col_inds = np.array(kwargs["label_col_inds"], dtype = np.int32)
     Y = array_tools.as_labelmatrix(Y)
     self.Y = Y
     self.trained = False
     if kwargs.has_key("regparam"):
         self.regparam = kwargs["regparam"]
     else:
         self.regparam = 0.
     if kwargs.has_key(CALLBACK_FUNCTION):
         self.callbackfun = kwargs[CALLBACK_FUNCTION]
     else:
         self.callbackfun = None
示例#24
0
 def __init__(self, **kwargs):
     Y = kwargs["train_labels"]
     Y = array_tools.as_labelmatrix(Y)
     self.Y = Y
     if kwargs.has_key('kmatrix1'):
         K1 = mat(kwargs['kmatrix1'])
         K2 = mat(kwargs['kmatrix2'])
         self.K1, self.K2 = K1, K2
         self.kernelmode = True
     else:
         X1 = mat(kwargs['xmatrix1'])
         X2 = mat(kwargs['xmatrix2'])
         self.X1, self.X2 = X1, X2
         self.kernelmode = False
     self.regparam = kwargs["regparam"]
     self.trained = False
示例#25
0
 def __init__(self, train_features, train_labels, validation_features=None, validation_labels=None, regparam=1.0, bias=1.0):
     X = train_features
     self.Y = array_tools.as_labelmatrix(train_labels)
     self.X = csc_matrix(X.T)
     self.bias = bias
     self.regparam = regparam
     if self.bias != 0.:
         bias_slice = sqrt(self.bias)*np.mat(ones((1,self.X.shape[1]),dtype=np.float64))
         self.X = sparse.vstack([self.X,bias_slice]).tocsc()
     else:
         self.bias = 0.
     self.X_csr = self.X.tocsr()
     if validation_features != None and validation_labels != None:
         self.callbackfun = EarlyStopCB(validation_features, validation_labels)
     else:
         self.callbackfun = None
     self.results = {}
示例#26
0
 def __init__(self, **kwargs):
     self.resource_pool = kwargs
     Y = kwargs[TRAIN_LABELS]
     self.label_row_inds = np.array(kwargs["label_row_inds"],
                                    dtype=np.int32)
     self.label_col_inds = np.array(kwargs["label_col_inds"],
                                    dtype=np.int32)
     Y = array_tools.as_labelmatrix(Y)
     self.Y = Y
     self.trained = False
     if kwargs.has_key("regparam"):
         self.regparam = kwargs["regparam"]
     else:
         self.regparam = 0.
     if kwargs.has_key(CALLBACK_FUNCTION):
         self.callbackfun = kwargs[CALLBACK_FUNCTION]
     else:
         self.callbackfun = None
示例#27
0
    def __init__(self, **kwargs):
        super(GreedyRLS, self).__init__(**kwargs)
        self.regparam = float(kwargs["regparam"])
        X = kwargs["train_features"]
        if isinstance(X, sp.base.spmatrix):
            self.X = X.todense()
        else:
            self.X = X
        self.X = self.X.T
        self.Y = kwargs["train_labels"]
        self.Y = array_tools.as_labelmatrix(self.Y)
        # Number of training examples
        self.size = self.Y.shape[0]
        # if not self.Y.shape[1] == 1:
        #    raise Exception('GreedyRLS currently supports only one output at a time. The output matrix is now of shape ' + str(self.Y.shape) + '.')
        if kwargs.has_key("bias"):
            self.bias = float(kwargs["bias"])
        else:
            self.bias = 0.0
        if kwargs.has_key("measure"):
            self.measure = kwargs["measure"]
        else:
            self.measure = None

        tsize = self.size
        fsize = X.shape[1]
        if not kwargs.has_key("subsetsize"):
            raise Exception("Parameter 'subsetsize' must be given.")
        self.desiredfcount = int(kwargs["subsetsize"])
        if not fsize >= self.desiredfcount:
            raise Exception(
                "The overall number of features "
                + str(fsize)
                + " is smaller than the desired number "
                + str(self.desiredfcount)
                + " of features to be selected."
            )
        self.results = {}
        if "use_default_callback" in kwargs and bool(kwargs["use_default_callback"]):
            self.callbackfun = DefaultCallback(**kwargs)
示例#28
0
 def __init__(self, **kwargs):
     super(CGRankRLS, self).__init__(**kwargs)
     if kwargs.has_key("regparam"):
         self.regparam = float(kwargs["regparam"])
     else:
         self.regparam = 0.
     if 'train_labels' in kwargs:
         Y = kwargs['train_labels']
         self.Y = array_tools.as_labelmatrix(Y)
         #Number of training examples
         self.size = Y.shape[0]
         if self.Y.shape[1] > 1:
             raise Exception(
                 'CGRankRLS does not currently work in multi-label mode')
         self.learn_from_labels = True
         if ('validation_features' in kwargs) and ('validation_labels'
                                                   in kwargs):
             validation_X = kwargs['validation_features']
             validation_Y = kwargs['validation_labels']
             if 'validation_qids' in kwargs:
                 validation_qids = kwargs['validation_qids']
             else:
                 validation_qids = None
             self.callbackfun = EarlyStopCB(validation_X, validation_Y,
                                            validation_qids)
     elif 'train_preferences' in kwargs:
         self.pairs = kwargs['train_preferences']
         self.learn_from_labels = False
     else:
         raise Exception('Neither labels nor preference information found')
     X = kwargs['train_features']
     self.X = csc_matrix(X.T)
     self.bias = 0.
     if 'train_qids' in kwargs:
         qids = kwargs['train_qids']
         self.setQids(qids)
     else:
         self.qidmap = None
     self.results = {}
示例#29
0
    def __init__(self, **kwargs):
        super(GreedyRLS, self).__init__(**kwargs)
        self.regparam = float(kwargs['regparam'])
        X = kwargs['train_features']
        if isinstance(X, sp.base.spmatrix):
            self.X = X.todense()
        else:
            self.X = X
        self.X = self.X.T
        self.Y = kwargs['train_labels']
        self.Y = array_tools.as_labelmatrix(self.Y)
        #Number of training examples
        self.size = self.Y.shape[0]
        #if not self.Y.shape[1] == 1:
        #    raise Exception('GreedyRLS currently supports only one output at a time. The output matrix is now of shape ' + str(self.Y.shape) + '.')
        if kwargs.has_key('bias'):
            self.bias = float(kwargs['bias'])
        else:
            self.bias = 0.
        if kwargs.has_key('measure'):
            self.measure = kwargs['measure']
        else:
            self.measure = None

        tsize = self.size
        fsize = X.shape[1]
        if not kwargs.has_key('subsetsize'):
            raise Exception("Parameter 'subsetsize' must be given.")
        self.desiredfcount = int(kwargs['subsetsize'])
        if not fsize >= self.desiredfcount:
            raise Exception('The overall number of features ' + str(fsize) +
                            ' is smaller than the desired number ' +
                            str(self.desiredfcount) +
                            ' of features to be selected.')
        self.results = {}
        if 'use_default_callback' in kwargs and bool(
                kwargs['use_default_callback']):
            self.callbackfun = DefaultCallback(**kwargs)
示例#30
0
 def loadResources(self):
     AbstractIterativeLearner.loadResources(self)
     if data_sources.TRAIN_LABELS in self.resource_pool:
         Y = self.resource_pool[data_sources.TRAIN_LABELS]
         self.Y = array_tools.as_labelmatrix(Y)
         #Number of training examples
         self.size = Y.shape[0]
         if Y.shape[1] > 1:
             raise Exception(
                 'CGRankRLS does not currently work in multi-label mode')
         self.learn_from_labels = True
         if (data_sources.VALIDATION_FEATURES
                 in self.resource_pool) and (data_sources.VALIDATION_LABELS
                                             in self.resource_pool):
             validation_X = self.resource_pool[
                 data_sources.VALIDATION_FEATURES]
             validation_Y = self.resource_pool[
                 data_sources.VALIDATION_LABELS]
             if data_sources.VALIDATION_QIDS in self.resource_pool:
                 validation_qids = self.resource_pool[
                     data_sources.VALIDATION_QIDS]
             else:
                 validation_qids = None
             self.callbackfun = EarlyStopCB(validation_X, validation_Y,
                                            validation_qids)
     elif data_sources.TRAIN_PREFERENCES in self.resource_pool:
         self.pairs = self.resource_pool[data_sources.TRAIN_PREFERENCES]
         self.learn_from_labels = False
     else:
         raise Exception('Neither labels nor preference information found')
     X = self.resource_pool[data_sources.TRAIN_FEATURES]
     self.X = csc_matrix(X.T)
     self.bias = 0.
     if data_sources.TRAIN_QIDS in self.resource_pool:
         qids = self.resource_pool[data_sources.TRAIN_QIDS]
         self.setQids(qids)
     self.results = {}
示例#31
0
 def __init__(self, **kwargs):
     super(CGRankRLS, self).__init__(**kwargs)
     if kwargs.has_key("regparam"):
         self.regparam = float(kwargs["regparam"])
     else:
         self.regparam = 0.
     if 'train_labels' in kwargs:
         Y = kwargs['train_labels']
         self.Y = array_tools.as_labelmatrix(Y)
         #Number of training examples
         self.size = Y.shape[0]
         if self.Y.shape[1] > 1:
             raise Exception('CGRankRLS does not currently work in multi-label mode')
         self.learn_from_labels = True
         if ('validation_features' in kwargs) and ('validation_labels' in kwargs):
             validation_X = kwargs['validation_features']
             validation_Y = kwargs['validation_labels']
             if 'validation_qids' in kwargs:
                 validation_qids = kwargs['validation_qids']
             else:
                 validation_qids = None
             self.callbackfun = EarlyStopCB(validation_X, validation_Y, validation_qids)
     elif 'train_preferences' in kwargs:
         self.pairs = kwargs['train_preferences']
         self.learn_from_labels = False
     else:
         raise Exception('Neither labels nor preference information found')
     X = kwargs['train_features']
     self.X = csc_matrix(X.T)
     self.bias = 0.
     if 'train_qids' in kwargs:
         qids = kwargs['train_qids']
         self.setQids(qids)
     else:
         self.qidmap = None
     self.results = {}
示例#32
0
 def loadResources(self):
     Y = self.resource_pool[data_sources.TRAIN_LABELS]
     Y = array_tools.as_labelmatrix(Y)
     self.Y = Y
     self.trained = False
示例#33
0
 def __init__(self, **kwargs):
     super(AbstractSupervisedLearner, self).__init__(**kwargs)
     Y = kwargs['train_labels']
     self.Y = array_tools.as_labelmatrix(Y)
     self.size = self.Y.shape[0]
     self.ysize = self.Y.shape[1]
示例#34
0
 def setLabels(self, Y):
     self.Y = array_tools.as_labelmatrix(Y)
     self.size = self.Y.shape[0]
     self.ysize = self.Y.shape[1]
示例#35
0
    def __init__(self,
                 svdad,
                 number_of_clusters=2,
                 regparam=1.0,
                 train_labels=None,
                 fixed_indices=None,
                 callback_obj=None):
        self.labelcount = number_of_clusters
        self.svdad = svdad
        self.regparam = regparam
        self.svals = svdad.svals
        self.svecs = svdad.rsvecs
        self.constraint = 0
        if self.labelcount == 2:
            self.oneclass = True
        else:
            self.oneclass = False
        #if not self.resource_pool.has_key('number_of_clusters'):
        #    raise Exception("Parameter 'number_of_clusters' must be given.")
        self.labelcount = number_of_clusters
        self.callbackfun = callback_obj
        if train_labels != None:
            Y_orig = array_tools.as_labelmatrix(train_labels)
            if Y_orig.shape[1] == 1:
                self.Y = mat(zeros((Y_orig.shape[0], 2)))
                self.Y[:, 0] = Y_orig
                self.Y[:, 1] = -Y_orig
                self.oneclass = True
            else:
                self.Y = Y_orig.copy()
                self.oneclass = False
            for i in range(self.Y.shape[0]):
                largestind = 0
                largestval = self.Y[i, 0]
                for j in range(self.Y.shape[1]):
                    if self.Y[i, j] > largestval:
                        largestind = j
                        largestval = self.Y[i, j]
                    self.Y[i, j] = -1.
                self.Y[i, largestind] = 1.
        else:
            size = self.svecs.shape[0]
            ysize = self.labelcount
            if self.labelcount == None: self.labelcount = 2
            self.Y = RandomLabelSource(size, ysize).readLabels()
        self.size = self.Y.shape[0]
        self.labelcount = self.Y.shape[1]
        self.classvec = -mat(ones((self.size, 1), dtype=int32))
        self.classcounts = mat(zeros((self.labelcount, 1), dtype=int32))
        for i in range(self.size):
            clazzind = 0
            largestlabel = self.Y[i, 0]
            for j in range(self.labelcount):
                if self.Y[i, j] > largestlabel:
                    largestlabel = self.Y[i, j]
                    clazzind = j
            self.classvec[i] = clazzind
            self.classcounts[clazzind] = self.classcounts[clazzind] + 1

        self.svecs_list = []
        for i in range(self.size):
            self.svecs_list.append(self.svecs[i].T)
        self.fixedindices = []
        if fixed_indices != None:
            self.fixedindices = fixed_indices
        self.results = {}
示例#36
0
def spearman(Y, P):
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)  
    return np.mean(spearman_multitask(Y, P))
示例#37
0
 def setLabels(self, Y):
     self.Y = array_tools.as_labelmatrix(Y)
     self.size = self.Y.shape[0]
     self.ysize = self.Y.shape[1]
示例#38
0
 def __init__(self, **kwargs):
     super(AbstractSupervisedLearner, self).__init__(**kwargs)
     Y = kwargs['train_labels']
     self.Y = array_tools.as_labelmatrix(Y)
     self.size = self.Y.shape[0]
     self.ysize = self.Y.shape[1]