示例#1
0
def cvmultnet(fit, \
            lambdau, \
            x, \
            y, \
            weights, \
            offset, \
            foldid, \
            ptype, \
            grouped, \
            keep = False):

    typenames = {
        'deviance': 'Multinomial Deviance',
        'mse': 'Mean-Squared Error',
        'mae': 'Mean Absolute Error',
        'class': 'Misclassification Error'
    }
    if ptype == 'default':
        ptype = 'deviance'

    ptypeList = ['mse', 'mae', 'deviance', 'class']
    if not ptype in ptypeList:
        print('Warning: only ', ptypeList, 'available for multinomial models; '
              'deviance'
              ' used')
        ptype = 'deviance'

    prob_min = 1.0e-5
    prob_max = 1 - prob_min
    nc = y.shape
    if nc[1] == 1:
        classes, sy = numpy.unique(y, return_inverse=True)
        nc = len(classes)
        indexes = numpy.eye(nc, nc)
        y = indexes[sy, :]
    else:
        nc = nc[1]

    is_offset = not (len(offset) == 0)
    predmat = numpy.ones([y.shape[0], nc, lambdau.size]) * numpy.NAN
    nfolds = numpy.amax(foldid) + 1
    nlams = []
    for i in range(nfolds):
        which = foldid == i
        fitobj = fit[i].copy()
        if is_offset:
            off_sub = offset[which, ]
        else:
            off_sub = numpy.empty([0])
        preds = glmnetPredict(fitobj, x[which, ], numpy.empty([0]), 'response',
                              False, off_sub)
        nlami = numpy.size(fit[i]['lambdau'])
        predmat[which, 0:nlami] = preds
        nlams.append(nlami)
    # convert nlams to scipy array
    nlams = numpy.array(nlams, dtype=numpy.integer)

    ywt = numpy.sum(y, axis=1, keepdims=True)
    y = y / numpy.tile(ywt, [1, y.shape[1]])
    weights = weights * ywt
    N = y.shape[0] - numpy.sum(
        numpy.isnan(predmat[:, 1, :]), axis=0, keepdims=True)
    bigY = numpy.tile(y[:, :, None], [1, 1, lambdau.size])

    if ptype == 'mse':
        cvraw = numpy.sum((bigY - predmat)**2, axis=1).squeeze()
    elif ptype == 'deviance':
        predmat = numpy.minimum(numpy.maximum(predmat, prob_min), prob_max)
        lp = bigY * numpy.log(predmat)
        ly = bigY * numpy.log(bigY)
        ly[y == 0] = 0
        cvraw = numpy.sum(2 * (ly - lp), axis=1).squeeze()
    elif ptype == 'mae':
        cvraw = numpy.sum(numpy.absolute(bigY - predmat), axis=1).squeeze()
    elif ptype == 'class':
        classid = numpy.zeros([y.shape[0], lambdau.size]) * numpy.NaN
        for i in range(lambdau.size):
            classid[:, i] = glmnet_softmax(predmat[:, :, i])
        classid = classid.reshape([classid.size, 1])
        yperm = bigY.transpose((0, 2, 1))
        yperm = yperm.reshape([yperm.size, 1])
        idx = sub2ind(yperm.shape, range(len(classid)), classid.transpose())
        cvraw = numpy.reshape(1 - yperm[idx], [-1, lambdau.size])

    if grouped == True:
        cvob = cvcompute(cvraw, weights, foldid, nlams)
        cvraw = cvob['cvraw']
        weights = cvob['weights']
        N = cvob['N']

    cvm = wtmean(cvraw, weights)
    sqccv = (cvraw - cvm)**2
    cvsd = numpy.sqrt(wtmean(sqccv, weights) / (N - 1))

    result = dict()
    result['cvm'] = cvm
    result['cvsd'] = cvsd
    result['name'] = typenames[ptype]

    if keep:
        result['fit_preval'] = predmat

    return (result)
示例#2
0
def cvlognet(fit, \
            lambdau, \
            x, \
            y, \
            weights, \
            offset, \
            foldid, \
            ptype, \
            grouped, \
            keep = False):

    typenames = {
        'deviance': 'Binomial Deviance',
        'mse': 'Mean-Squared Error',
        'mae': 'Mean Absolute Error',
        'auc': 'AUC',
        'class': 'Misclassification Error'
    }
    if ptype == 'default':
        ptype = 'deviance'

    ptypeList = ['mse', 'mae', 'deviance', 'auc', 'class']
    if not ptype in ptypeList:
        print('Warning: only ', ptypeList, 'available for binomial models; '
              'deviance'
              ' used')
        ptype = 'deviance'

    prob_min = 1.0e-5
    prob_max = 1 - prob_min
    nc = y.shape[1]
    if nc == 1:
        classes, sy = scipy.unique(y, return_inverse=True)
        nc = len(classes)
        indexes = scipy.eye(nc, nc)
        y = indexes[sy, :]
    else:
        classes = scipy.arange(nc) + 1  # 1:nc

    N = y.size
    nfolds = scipy.amax(foldid) + 1
    if (N / nfolds < 10) and (type == 'auc'):
        print(
            'Warning: Too few (<10) observations per fold for type.measure=auc in cvlognet'
        )
        print(
            'Warning:     changed to type.measure = deviance. Alternately, use smaller value '
        )
        print('Warning:     for nfolds')
        ptype = 'deviance'

    if (N / nfolds < 3) and grouped:
        print(
            'Warning: option grouped = False enforced in cvglmnet as there are < 3 observations per fold'
        )
        grouped = False

    is_offset = not (len(offset) == 0)
    predmat = scipy.ones([y.shape[0], lambdau.size]) * scipy.NAN
    nfolds = scipy.amax(foldid) + 1
    nlams = []
    for i in range(nfolds):
        which = foldid == i
        fitobj = fit[i].copy()
        if is_offset:
            off_sub = offset[which, ]
        else:
            off_sub = scipy.empty([0])
        preds = glmnetPredict(fitobj, x[which, ], scipy.empty([0]), 'response',
                              False, off_sub)
        nlami = scipy.size(fit[i]['lambdau'])
        predmat[which, 0:nlami] = preds
        nlams.append(nlami)
    # convert nlams to scipy array
    nlams = scipy.array(nlams, dtype=scipy.integer)

    if ptype == 'auc':
        cvraw = scipy.zeros([nfolds, lambdau.size]) * scipy.NaN
        good = scipy.zeros([nfolds, lambdau.size])
        for i in range(nfolds):
            good[i, 0:nlams[i]] = 1
            which = foldid == i
            for j in range(nlams[i]):
                cvraw[i, j] = auc_mat(y[which, ], predmat[which, j],
                                      weights[which])
        N = scipy.sum(good, axis=0)
        sweights = scipy.zeros([nfolds, 1])
        for i in range(nfolds):
            sweights[i] = scipy.sum(weights[foldid == i], axis=0)
        weights = sweights
    else:
        ywt = scipy.sum(y, axis=1, keepdims=True)
        y = y / scipy.tile(ywt, [1, y.shape[1]])
        weights = (weights * ywt.T).T
        N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis=0, keepdims=True)
        yy1 = scipy.tile(y[:, 0:1], [1, lambdau.size])
        yy2 = scipy.tile(y[:, 1:2], [1, lambdau.size])

    if ptype == 'mse':
        cvraw = (yy1 - (1 - predmat))**2 + (yy2 - (1 - predmat))**2
    elif ptype == 'deviance':
        predmat = scipy.minimum(scipy.maximum(predmat, prob_min), prob_max)
        lp = yy1 * scipy.log(1 - predmat) + yy2 * scipy.log(predmat)
        ly = scipy.log(y)
        ly[y == 0] = 0
        ly = scipy.dot(y * ly, scipy.array([1.0, 1.0]).reshape([2, 1]))
        cvraw = 2 * (scipy.tile(ly, [1, lambdau.size]) - lp)
    elif ptype == 'mae':
        cvraw = scipy.absolute(yy1 -
                               (1 - predmat)) + scipy.absolute(yy2 -
                                                               (1 - predmat))
    elif ptype == 'class':
        cvraw = yy1 * (predmat > 0.5) + yy2 * (predmat <= 0.5)

    if y.size / nfolds < 3 and grouped == True:
        print(
            'Option grouped=false enforced in cv.glmnet, since < 3 observations per fold'
        )
        grouped = False

    if grouped == True:
        cvob = cvcompute(cvraw, weights, foldid, nlams)
        cvraw = cvob['cvraw']
        weights = cvob['weights']
        N = cvob['N']

    cvm = wtmean(cvraw, weights)
    sqccv = (cvraw - cvm)**2
    cvsd = scipy.sqrt(wtmean(sqccv, weights) / (N - 1))

    result = dict()
    result['cvm'] = cvm
    result['cvsd'] = cvsd
    result['name'] = typenames[ptype]

    if keep:
        result['fit_preval'] = predmat

    return (result)
示例#3
0
def cvfishnet(fit, \
            lambdau, \
            x, \
            y, \
            weights, \
            offset, \
            foldid, \
            ptype, \
            grouped, \
            keep = False):

    typenames = {
        'deviance': 'Poisson Deviance',
        'mse': 'Mean-Squared Error',
        'mae': 'Mean Absolute Error'
    }
    if ptype == 'default':
        ptype = 'deviance'

    ptypeList = ['mse', 'mae', 'deviance']
    if not ptype in ptypeList:
        print('Warning: only ', ptypeList, 'available for Poisson models; '
              'deviance'
              ' used')
        ptype = 'deviance'

    if len(offset) > 0:
        is_offset = True
    else:
        is_offset = False

    predmat = scipy.ones([y.size, lambdau.size]) * scipy.NAN
    nfolds = scipy.amax(foldid) + 1
    nlams = []
    for i in range(nfolds):
        which = foldid == i
        fitobj = fit[i].copy()
        if is_offset:
            off_sub = offset[which]
        else:
            off_sub = scipy.empty([0])
        preds = glmnetPredict(fitobj, x[which, ], offset=off_sub)
        nlami = scipy.size(fit[i]['lambdau'])
        predmat[which, 0:nlami] = preds
        nlams.append(nlami)
    # convert nlams to scipy array
    nlams = scipy.array(nlams, dtype=scipy.integer)

    N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis=0)
    yy = scipy.tile(y, [1, lambdau.size])

    if ptype == 'mse':
        cvraw = (yy - predmat)**2
    elif ptype == 'deviance':
        cvraw = devi(yy, predmat)
    elif ptype == 'mae':
        cvraw = scipy.absolute(yy - predmat)

    if y.size / nfolds < 3 and grouped == True:
        print(
            'Option grouped=false enforced in cvglmnet, since < 3 observations per fold'
        )
        grouped = False

    if grouped == True:
        cvob = cvcompute(cvraw, weights, foldid, nlams)
        cvraw = cvob['cvraw']
        weights = cvob['weights']
        N = cvob['N']

    cvm = wtmean(cvraw, weights)
    sqccv = (cvraw - cvm)**2
    cvsd = scipy.sqrt(wtmean(sqccv, weights) / (N - 1))

    result = dict()
    result['cvm'] = cvm
    result['cvsd'] = cvsd
    result['name'] = typenames[ptype]

    if keep:
        result['fit_preval'] = predmat

    return (result)
示例#4
0
def cvmrelnet(fit,
              lambdau,
              x,
              y,
              weights,
              offset,
              foldid,
              ptype,
              grouped,
              keep=False):
    
    typenames = {'deviance':'Mean-Squared Error', 'mse':'Mean-Squared Error', 
                 'mae':'Mean Absolute Error'}
    if ptype == 'default':
        ptype = 'mse'

    ptypeList = ['mse', 'mae', 'deviance']    
    if not ptype in ptypeList:
        print('Warning: only ', ptypeList, 'available for Gaussian models; ''mse'' used')
        ptype = 'mse'

    nobs, nc = y.shape
    
    if len(offset) > 0:
        y = y - offset

    predmat = scipy.ones([nobs, nc, lambdau.size])*scipy.NAN               
    nfolds = scipy.amax(foldid) + 1
    nlams = [] 
    for i in range(nfolds):
        which = foldid == i
        fitobj = fit[i].copy()
        fitobj['offset'] = False
        preds = glmnetPredict(fitobj, x[which, ])
        nlami = scipy.size(fit[i]['lambdau'])
        predmat[which, 0:nlami] = preds
        nlams.append(nlami)
    # convert nlams to scipy array
    nlams = scipy.array(nlams, dtype=scipy.integer)

    N = nobs - scipy.reshape(scipy.sum(scipy.isnan(predmat[:, 1, :]), axis=0), (1, -1))
    bigY = scipy.tile(y[:, :, None], [1, 1, lambdau.size])

    if ptype == 'mse':
        cvraw = scipy.sum((bigY - predmat)**2, axis=1).squeeze()
    elif ptype == 'mae':
        cvraw = scipy.sum(scipy.absolute(bigY - predmat), axis=1).squeeze()
        
    if y.size/nfolds < 3 and grouped == True:
        print('Option grouped=false enforced in cv.glmnet, since < 3 observations per fold')
        grouped = False
        
    if grouped == True:
        cvob = cvcompute(cvraw, weights, foldid, nlams)
        cvraw = cvob['cvraw']
        weights = cvob['weights']
        N = cvob['N']
        
    cvm = wtmean(cvraw, weights)
    sqccv = (cvraw - cvm)**2
    cvsd = scipy.sqrt(wtmean(sqccv, weights)/(N-1))

    result = dict()
    result['cvm'] = cvm
    result['cvsd'] = cvsd
    result['name'] = typenames[ptype]

    if keep:
        result['fit_preval'] = predmat
        
    return result