Пример #1
0
def totalLossRDD(dataRDD, beta, lam=0.0):
    """  Given a β represented by RDD betaRDD and a grouped dataset data represented by groupedDataRDD  compute 
         the regularized total logistic loss :

            L(β) = Σ_{(x,y) in data}  l(β;x,y)  + λ ||β ||_2^2             
        
         Inputs are:
            - groupedDataRDD: a groupedRDD containing pairs of the form (partitionID,dataList), where 
              partitionID is an integer and dataList is a list of (SparseVector(x),y) values
            - partitionsToFeaturesRDD: an RDD mapping partitions to relevant features, created by mapFeaturesToPartitionsRDD
            - betaRDD: a vector β represented as an RDD of (feature,value) pairs
            - lam: the regularization parameter λ

         The output should be the scalar value L(β)
    """

    # Compute  λ ||β ||_2^2

    reg = lam * beta.dot(beta)

    # Create intermediate rdd

    tot = dataRDD.map(lambda (x,y):logisticLoss(beta,x,y))\
                  .reduce(add)

    return tot + reg
def totalLossRDD(dataRDD, beta, lam=0.0):
    """ Computes the regularized total logistic loss of the dataset and a SparseVector beta:
    The input is:
            - dataRDD: containing pairs of the form (SparseVector(x),y)
            - beta: a sparse vector β
            - lam: the regularization parameter λ
    The return value is a scalar
    """
    return dataRDD.map(lambda (x, y): logisticLoss(beta, x, y)).reduce(
        add) + lam * beta.dot(beta)
Пример #3
0
def totalLossRDD(dataRDD,beta,lam = 0.0):
    """  Given a sparse vector beta and a dataset  compute the regularized total logistic loss :
              
               L(β) = Σ_{(x,y) in data}  l(β;x,y)  + λ ||β ||_2^2             
        
         The inputs are:
            - data: a RDD containing pairs of the form (x,y), where x is a sparse vector and y is a binary value
            - beta: a sparse vector β
            - lam: the regularization parameter λ
         The return value is the loss value
    """
    return dataRDD.map(lambda (x, y):logisticLoss(beta, x, y)).reduce(lambda x, y: x + y) + lam*beta.dot(beta)
Пример #4
0
def totalLossRDD(dataRDD, beta, lam=0.0):
    """  Given a sparse vector beta and a dataset  compute the regularized total logistic loss :

               L(β) = Σ_{(x,y) in data}  l(β;x,y)  + λ ||β ||_2^2

         Inputs are:
            - data: a rdd containing pairs of the form (x,y), where x is a sparse vector and y is a binary value
            - beta: a sparse vector β
            - lam: the regularization parameter λ
    """
    total_loss = dataRDD.map(
        lambda elem: logisticLoss(beta, elem[0], elem[1])).sum()
    return total_loss + lam * beta.dot(beta)
Пример #5
0
def totalLossRDD(dataRDD, beta, lam=0.0):
    """  Given a sparse vector beta and a dataset in the form of RDD compute the regularized total logistic loss :
              
               L(β) = Σ_{(x,y) in data}  l(β;x,y)  + λ ||β ||_2^2             
        
         Inputs are:
            - dataRDD: an RDD containing pairs of the form (x,y), where x is a sparse vector and y is a binary value
            - beta: a sparse vector β
            - lam: the regularization parameter λ

        The return value is a float number represents the total loss
    """
    loss = dataRDD.map(lambda (x,y): logisticLoss(beta, x, y))\
                  .reduce(add)
    return loss + lam * beta.dot(beta)