def totalLossRDD(dataRDD, beta, lam=0.0): """ Given a β represented by RDD betaRDD and a grouped dataset data represented by groupedDataRDD compute the regularized total logistic loss : L(β) = Σ_{(x,y) in data} l(β;x,y) + λ ||β ||_2^2 Inputs are: - groupedDataRDD: a groupedRDD containing pairs of the form (partitionID,dataList), where partitionID is an integer and dataList is a list of (SparseVector(x),y) values - partitionsToFeaturesRDD: an RDD mapping partitions to relevant features, created by mapFeaturesToPartitionsRDD - betaRDD: a vector β represented as an RDD of (feature,value) pairs - lam: the regularization parameter λ The output should be the scalar value L(β) """ # Compute λ ||β ||_2^2 reg = lam * beta.dot(beta) # Create intermediate rdd tot = dataRDD.map(lambda (x,y):logisticLoss(beta,x,y))\ .reduce(add) return tot + reg
def totalLossRDD(dataRDD, beta, lam=0.0): """ Computes the regularized total logistic loss of the dataset and a SparseVector beta: The input is: - dataRDD: containing pairs of the form (SparseVector(x),y) - beta: a sparse vector β - lam: the regularization parameter λ The return value is a scalar """ return dataRDD.map(lambda (x, y): logisticLoss(beta, x, y)).reduce( add) + lam * beta.dot(beta)
def totalLossRDD(dataRDD,beta,lam = 0.0): """ Given a sparse vector beta and a dataset compute the regularized total logistic loss : L(β) = Σ_{(x,y) in data} l(β;x,y) + λ ||β ||_2^2 The inputs are: - data: a RDD containing pairs of the form (x,y), where x is a sparse vector and y is a binary value - beta: a sparse vector β - lam: the regularization parameter λ The return value is the loss value """ return dataRDD.map(lambda (x, y):logisticLoss(beta, x, y)).reduce(lambda x, y: x + y) + lam*beta.dot(beta)
def totalLossRDD(dataRDD, beta, lam=0.0): """ Given a sparse vector beta and a dataset compute the regularized total logistic loss : L(β) = Σ_{(x,y) in data} l(β;x,y) + λ ||β ||_2^2 Inputs are: - data: a rdd containing pairs of the form (x,y), where x is a sparse vector and y is a binary value - beta: a sparse vector β - lam: the regularization parameter λ """ total_loss = dataRDD.map( lambda elem: logisticLoss(beta, elem[0], elem[1])).sum() return total_loss + lam * beta.dot(beta)
def totalLossRDD(dataRDD, beta, lam=0.0): """ Given a sparse vector beta and a dataset in the form of RDD compute the regularized total logistic loss : L(β) = Σ_{(x,y) in data} l(β;x,y) + λ ||β ||_2^2 Inputs are: - dataRDD: an RDD containing pairs of the form (x,y), where x is a sparse vector and y is a binary value - beta: a sparse vector β - lam: the regularization parameter λ The return value is a float number represents the total loss """ loss = dataRDD.map(lambda (x,y): logisticLoss(beta, x, y))\ .reduce(add) return loss + lam * beta.dot(beta)