def cvmultnet(fit, \ lambdau, \ x, \ y, \ weights, \ offset, \ foldid, \ ptype, \ grouped, \ keep = False): typenames = { 'deviance': 'Multinomial Deviance', 'mse': 'Mean-Squared Error', 'mae': 'Mean Absolute Error', 'class': 'Misclassification Error' } if ptype == 'default': ptype = 'deviance' ptypeList = ['mse', 'mae', 'deviance', 'class'] if not ptype in ptypeList: print('Warning: only ', ptypeList, 'available for multinomial models; ' 'deviance' ' used') ptype = 'deviance' prob_min = 1.0e-5 prob_max = 1 - prob_min nc = y.shape if nc[1] == 1: classes, sy = numpy.unique(y, return_inverse=True) nc = len(classes) indexes = numpy.eye(nc, nc) y = indexes[sy, :] else: nc = nc[1] is_offset = not (len(offset) == 0) predmat = numpy.ones([y.shape[0], nc, lambdau.size]) * numpy.NAN nfolds = numpy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i fitobj = fit[i].copy() if is_offset: off_sub = offset[which, ] else: off_sub = numpy.empty([0]) preds = glmnetPredict(fitobj, x[which, ], numpy.empty([0]), 'response', False, off_sub) nlami = numpy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array nlams = numpy.array(nlams, dtype=numpy.integer) ywt = numpy.sum(y, axis=1, keepdims=True) y = y / numpy.tile(ywt, [1, y.shape[1]]) weights = weights * ywt N = y.shape[0] - numpy.sum( numpy.isnan(predmat[:, 1, :]), axis=0, keepdims=True) bigY = numpy.tile(y[:, :, None], [1, 1, lambdau.size]) if ptype == 'mse': cvraw = numpy.sum((bigY - predmat)**2, axis=1).squeeze() elif ptype == 'deviance': predmat = numpy.minimum(numpy.maximum(predmat, prob_min), prob_max) lp = bigY * numpy.log(predmat) ly = bigY * numpy.log(bigY) ly[y == 0] = 0 cvraw = numpy.sum(2 * (ly - lp), axis=1).squeeze() elif ptype == 'mae': cvraw = numpy.sum(numpy.absolute(bigY - predmat), axis=1).squeeze() elif ptype == 'class': classid = numpy.zeros([y.shape[0], lambdau.size]) * numpy.NaN for i in range(lambdau.size): classid[:, i] = glmnet_softmax(predmat[:, :, i]) classid = classid.reshape([classid.size, 1]) yperm = bigY.transpose((0, 2, 1)) yperm = yperm.reshape([yperm.size, 1]) idx = sub2ind(yperm.shape, range(len(classid)), classid.transpose()) cvraw = numpy.reshape(1 - yperm[idx], [-1, lambdau.size]) if grouped == True: cvob = cvcompute(cvraw, weights, foldid, nlams) cvraw = cvob['cvraw'] weights = cvob['weights'] N = cvob['N'] cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 cvsd = numpy.sqrt(wtmean(sqccv, weights) / (N - 1)) result = dict() result['cvm'] = cvm result['cvsd'] = cvsd result['name'] = typenames[ptype] if keep: result['fit_preval'] = predmat return (result)
def cvlognet(fit, \ lambdau, \ x, \ y, \ weights, \ offset, \ foldid, \ ptype, \ grouped, \ keep = False): typenames = { 'deviance': 'Binomial Deviance', 'mse': 'Mean-Squared Error', 'mae': 'Mean Absolute Error', 'auc': 'AUC', 'class': 'Misclassification Error' } if ptype == 'default': ptype = 'deviance' ptypeList = ['mse', 'mae', 'deviance', 'auc', 'class'] if not ptype in ptypeList: print('Warning: only ', ptypeList, 'available for binomial models; ' 'deviance' ' used') ptype = 'deviance' prob_min = 1.0e-5 prob_max = 1 - prob_min nc = y.shape[1] if nc == 1: classes, sy = scipy.unique(y, return_inverse=True) nc = len(classes) indexes = scipy.eye(nc, nc) y = indexes[sy, :] else: classes = scipy.arange(nc) + 1 # 1:nc N = y.size nfolds = scipy.amax(foldid) + 1 if (N / nfolds < 10) and (type == 'auc'): print( 'Warning: Too few (<10) observations per fold for type.measure=auc in cvlognet' ) print( 'Warning: changed to type.measure = deviance. Alternately, use smaller value ' ) print('Warning: for nfolds') ptype = 'deviance' if (N / nfolds < 3) and grouped: print( 'Warning: option grouped = False enforced in cvglmnet as there are < 3 observations per fold' ) grouped = False is_offset = not (len(offset) == 0) predmat = scipy.ones([y.shape[0], lambdau.size]) * scipy.NAN nfolds = scipy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i fitobj = fit[i].copy() if is_offset: off_sub = offset[which, ] else: off_sub = scipy.empty([0]) preds = glmnetPredict(fitobj, x[which, ], scipy.empty([0]), 'response', False, off_sub) nlami = scipy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array nlams = scipy.array(nlams, dtype=scipy.integer) if ptype == 'auc': cvraw = scipy.zeros([nfolds, lambdau.size]) * scipy.NaN good = scipy.zeros([nfolds, lambdau.size]) for i in range(nfolds): good[i, 0:nlams[i]] = 1 which = foldid == i for j in range(nlams[i]): cvraw[i, j] = auc_mat(y[which, ], predmat[which, j], weights[which]) N = scipy.sum(good, axis=0) sweights = scipy.zeros([nfolds, 1]) for i in range(nfolds): sweights[i] = scipy.sum(weights[foldid == i], axis=0) weights = sweights else: ywt = scipy.sum(y, axis=1, keepdims=True) y = y / scipy.tile(ywt, [1, y.shape[1]]) weights = (weights * ywt.T).T N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis=0, keepdims=True) yy1 = scipy.tile(y[:, 0:1], [1, lambdau.size]) yy2 = scipy.tile(y[:, 1:2], [1, lambdau.size]) if ptype == 'mse': cvraw = (yy1 - (1 - predmat))**2 + (yy2 - (1 - predmat))**2 elif ptype == 'deviance': predmat = scipy.minimum(scipy.maximum(predmat, prob_min), prob_max) lp = yy1 * scipy.log(1 - predmat) + yy2 * scipy.log(predmat) ly = scipy.log(y) ly[y == 0] = 0 ly = scipy.dot(y * ly, scipy.array([1.0, 1.0]).reshape([2, 1])) cvraw = 2 * (scipy.tile(ly, [1, lambdau.size]) - lp) elif ptype == 'mae': cvraw = scipy.absolute(yy1 - (1 - predmat)) + scipy.absolute(yy2 - (1 - predmat)) elif ptype == 'class': cvraw = yy1 * (predmat > 0.5) + yy2 * (predmat <= 0.5) if y.size / nfolds < 3 and grouped == True: print( 'Option grouped=false enforced in cv.glmnet, since < 3 observations per fold' ) grouped = False if grouped == True: cvob = cvcompute(cvraw, weights, foldid, nlams) cvraw = cvob['cvraw'] weights = cvob['weights'] N = cvob['N'] cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 cvsd = scipy.sqrt(wtmean(sqccv, weights) / (N - 1)) result = dict() result['cvm'] = cvm result['cvsd'] = cvsd result['name'] = typenames[ptype] if keep: result['fit_preval'] = predmat return (result)
def cvfishnet(fit, \ lambdau, \ x, \ y, \ weights, \ offset, \ foldid, \ ptype, \ grouped, \ keep = False): typenames = { 'deviance': 'Poisson Deviance', 'mse': 'Mean-Squared Error', 'mae': 'Mean Absolute Error' } if ptype == 'default': ptype = 'deviance' ptypeList = ['mse', 'mae', 'deviance'] if not ptype in ptypeList: print('Warning: only ', ptypeList, 'available for Poisson models; ' 'deviance' ' used') ptype = 'deviance' if len(offset) > 0: is_offset = True else: is_offset = False predmat = scipy.ones([y.size, lambdau.size]) * scipy.NAN nfolds = scipy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i fitobj = fit[i].copy() if is_offset: off_sub = offset[which] else: off_sub = scipy.empty([0]) preds = glmnetPredict(fitobj, x[which, ], offset=off_sub) nlami = scipy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array nlams = scipy.array(nlams, dtype=scipy.integer) N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis=0) yy = scipy.tile(y, [1, lambdau.size]) if ptype == 'mse': cvraw = (yy - predmat)**2 elif ptype == 'deviance': cvraw = devi(yy, predmat) elif ptype == 'mae': cvraw = scipy.absolute(yy - predmat) if y.size / nfolds < 3 and grouped == True: print( 'Option grouped=false enforced in cvglmnet, since < 3 observations per fold' ) grouped = False if grouped == True: cvob = cvcompute(cvraw, weights, foldid, nlams) cvraw = cvob['cvraw'] weights = cvob['weights'] N = cvob['N'] cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 cvsd = scipy.sqrt(wtmean(sqccv, weights) / (N - 1)) result = dict() result['cvm'] = cvm result['cvsd'] = cvsd result['name'] = typenames[ptype] if keep: result['fit_preval'] = predmat return (result)
def cvmrelnet(fit, lambdau, x, y, weights, offset, foldid, ptype, grouped, keep=False): typenames = {'deviance':'Mean-Squared Error', 'mse':'Mean-Squared Error', 'mae':'Mean Absolute Error'} if ptype == 'default': ptype = 'mse' ptypeList = ['mse', 'mae', 'deviance'] if not ptype in ptypeList: print('Warning: only ', ptypeList, 'available for Gaussian models; ''mse'' used') ptype = 'mse' nobs, nc = y.shape if len(offset) > 0: y = y - offset predmat = scipy.ones([nobs, nc, lambdau.size])*scipy.NAN nfolds = scipy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i fitobj = fit[i].copy() fitobj['offset'] = False preds = glmnetPredict(fitobj, x[which, ]) nlami = scipy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array nlams = scipy.array(nlams, dtype=scipy.integer) N = nobs - scipy.reshape(scipy.sum(scipy.isnan(predmat[:, 1, :]), axis=0), (1, -1)) bigY = scipy.tile(y[:, :, None], [1, 1, lambdau.size]) if ptype == 'mse': cvraw = scipy.sum((bigY - predmat)**2, axis=1).squeeze() elif ptype == 'mae': cvraw = scipy.sum(scipy.absolute(bigY - predmat), axis=1).squeeze() if y.size/nfolds < 3 and grouped == True: print('Option grouped=false enforced in cv.glmnet, since < 3 observations per fold') grouped = False if grouped == True: cvob = cvcompute(cvraw, weights, foldid, nlams) cvraw = cvob['cvraw'] weights = cvob['weights'] N = cvob['N'] cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 cvsd = scipy.sqrt(wtmean(sqccv, weights)/(N-1)) result = dict() result['cvm'] = cvm result['cvsd'] = cvsd result['name'] = typenames[ptype] if keep: result['fit_preval'] = predmat return result