Пример #1
def getTStat(X,y,alpha,lam,nSamp=100):
	# here we are doing residual bootstrap 
	# to identify the std err and report 
	# the t-stat (mean/st err)
	nObs,nRegs = X.shape
	# sd is done by res boot so we need to get the res
	enm = enet.fit(X,y, alpha,lambdas=[lam])
	yHat = enm.predict(X)[:,0]
	res = y - yHat
	resCent = res-np.mean(res)
	ySample = np.zeros((nObs,nSamp))
	# now we need the samples 
	for i in range(nSamp):
		resSample = st.sampleWR(resCent)
		ySample[:,i] = yHat+resSample

	# residual bs time
	sc = np.zeros(nRegs)
	sSqc = np.zeros(nRegs)

	for i in range(nSamp):
		# need the coef
		# they change so we need to map the back to the original
		tmpEnm = enet.fit(X,ySample[:,i], alpha,lambdas=[lam])
		sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0]
		sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2

	# get averages and variances
	aveCoef = sc/float(nSamp)
	sdCoef = np.sqrt(sSqc/float(nSamp) - aveCoef**2)

	# get tstat
	# due to the sparsity of lasso
	# its possible for a coef to be zero 
	# on all samples, thus a zero st error
	# we are going to remove the zeros
	sdCoef[sdCoef<1E-52] = 1E-52
	tStat = np.abs(aveCoef/sdCoef)
	return tStat
Пример #2
	def estStErr(self,nSamp=100):
		X = self._X
		y = self._y
		nObs,nRegs = X.shape

		lam = self._lam
		yHat = self._yHat
		intercept= self._intercept
		globalCoef = self._globalCoef
		coefIndex = self._coefIndex
		notEmpty = self._notEmpty
		alpha = self._alpha

		# get the bootstrap residual response samples
		res = y - yHat
		resCent = res-np.mean(res)
		ySample = np.zeros((nObs,nSamp))
		self._ySample = ySample
		for i in range(nSamp):
			resSample = st.sampleWR(resCent)
			ySample[:,i] = yHat+resSample

		if notEmpty:
			# working on subset now
			Xhat = X[:,coefIndex]
			self._Xhat = Xhat
			nObs,nRegsHat = Xhat.shape
			sdXhat = np.sqrt(np.var(Xhat,0))
			self._sdXhat = sdXhat

			# residual bs time
			sumErr = 0
			sumSqErr = 0
			sumNullErr = 0
			sumSqNullErr = 0
			sc = np.zeros(nRegsHat)
			sSqc = np.zeros(nRegsHat)
			sumSup = np.zeros(nRegsHat)

			for i in range(nSamp):
				# cv to get the errors
				err,tmpEnm,tmpallVals = fitSampling(Xhat,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
				sumErr = err.mErr[0] + sumErr
				sumSqErr = err.mErr[0]**2 + sumSqErr
				# cv over this thing to get the null model errors
				nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
				sumNullErr = sumNullErr + nullErr
				sumSqNullErr = sumSqNullErr + nullErr**2
				# need the coef
				# they change so we need to map the back to the original
				tmpEnm = enet.fit(Xhat,ySample[:,i], alpha,lambdas=[lam])
				sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0]
				sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2
				# find supports 
				occur = np.zeros(len(tmpEnm.coef[:,0]))
				occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0
				sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur

			# get averages and variances
			aveErr = sumErr/nSamp
			self._aveErr = aveErr
			self._sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2)
			aveNullErr = sumNullErr/nSamp
			self._sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)
			aveCoef = sc/nSamp
			self._aveCoef = aveCoef
			self._sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2)
			self._pSup = sumSup/nSamp

				# residual bs time
			sumNullErr = 0
			sumSqNullErr = 0
			for i in range(nSamp):
				# cv over this thing to get the null model errors
				nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
				sumNullErr = sumNullErr + nullErr
				sumSqNullErr = sumSqNullErr + nullErr**2
			# get averages and variances
			aveNullErr = sumNullErr/nSamp
			sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)
			self._aveNullErr = aveNullErr
			self._sdNullErr = sdNullErr
			self._aveErr = aveNullErr
			self._sdErr = sdNullErr
Пример #3
def estModel(XFull,y,nSamp=100,alphaList=np.array([1]),estErr=True,estImp=False,reduceX=False,params=[]):
	"""Estimate a mean and standard deviation
	for an elastic net model using bootstrap 
	Note: Bootstrap resampling is used to select
	model parameters, then the bs res at these 
	params is used on the full feature set X
	to calculate means and standard errors.
	Note: if estErr then 10 fold CV is used to estimate 
	the prediction error at each iteration of the bs.
	This is ten extra iterations at each bs res 
	sample, but reduces the bias in prediction error.
	The mean and sdDev of the CV error is then reported.
	Note: If params are passed then we assume its a tuple
	with the (lambda,alpha) model parameters.  In this case 
	model selection is bipassed. and these params are used.

	nObs,nRegsFull = XFull.shape
	# select full model values
	if len(params)==2:
		lam,alpha = params
		enm = enet.fit(XFull,y,alpha,lambdas=[lam])[0]
		enm = select(XFull,y,nSamp,alphaList)
	lam = enm.lambdas[0]
	yHat = enm.predict(XFull)
	intercept = enm.intercept[0]
	globalCoef =enm.coef[np.abs(enm.coef)>1E-21]
	coefIndex = enm.indices[np.abs(enm.coef)>1E-21]
	alpha = enm.alpha

	# now is when we reduce the x if we need too!
	if reduceX:
		nRegs = len(coefIndex)
		if nRegs > 0:
			X = XFull[:,coefIndex]
			nObs, _ = X.shape
		X = XFull
		nRegs = nRegsFull

	# get the bootstrap residual response samples
	res = y - yHat
	resCent = res-np.mean(res)
	ySample = np.zeros((nObs,nSamp))
	for i in range(nSamp):
		resSample = st.sampleWR(resCent)
		ySample[:,i] = yHat+resSample

	if nRegs > 0:
		# residual bs time
		if estErr:
			sumErr = 0
			sumSqErr = 0
			sumNullErr = 0
			sumSqNullErr = 0

		sc = np.zeros(nRegs)
		sSqc = np.zeros(nRegs)
		ac = lil_matrix((nRegs,nSamp))
		sumSup = np.zeros(nRegs)

		for i in range(nSamp):
			# cv to get the errors
			if estErr:
				err,tmpEnm,tmpallVals = fitSampling(X,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
				sumErr = err.mErr[0] + sumErr
				sumSqErr = err.mErr[0]**2 + sumSqErr
				# cv over this thing to get the null model errors
				nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
				sumNullErr = sumNullErr + nullErr
				sumSqNullErr = sumSqNullErr + nullErr**2

			# need the coef
			# they change so we need to map the back to the original
			tmpEnm = enet.fit(X,ySample[:,i], alpha,lambdas=[lam])
			sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0]
			sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2
			if len(tmpEnm.indices)>0:
				ac[tmpEnm.indices,i] = tmpEnm.coef
			# find supports 
			occur = np.zeros(len(tmpEnm.coef[:,0]))
			occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0
			sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur

		# get averages and variances
		if estErr:
			aveErr = sumErr/nSamp
			sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2)
			aveNullErr = sumNullErr/nSamp
			sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)

		aveCoef = sc/nSamp
		sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2)
		#some crazy stuff here becase of the way scipy mat is shaped
		medCoef = np.array(np.median(ac.todense(),1))[:,0]
		pSup = sumSup/nSamp
		indices = np.arange(nRegs)[np.abs(medCoef)>1E-21]
		# put it in a dict for simplicity 
		solution = {}
		if estErr:
			solution['aveErr'] = aveErr
			solution['sdErr'] = sdErr
			solution['aveNullErr'] = aveNullErr
			solution['sdNullErr'] = sdNullErr
		if reduceX:
			# need to go back to the original indicies 
			solution['aveCoef'] = np.zeros(nRegsFull)
			solution['sdCoef'] = np.zeros(nRegsFull)
			solution['medCoef'] = np.zeros(nRegsFull)
			solution['pSup'] = np.zeros(nRegsFull)

			solution['aveCoef'][coefIndex] = aveCoef
			solution['sdCoef'][coefIndex] = sdCoef
			solution['medCoef'][coefIndex] = medCoef
			solution['pSup'][coefIndex] = pSup
			solution['indices'] = coefIndex[indices]
			solution['aveCoef'] = aveCoef
			solution['sdCoef'] = sdCoef
			solution['medCoef'] = medCoef
			solution['pSup'] = pSup
			solution['indices'] = indices
		nRegsHat = len(indices)
		if nRegsHat>0 and estImp:
			Xhat = X[:,indices]
			# lets do the leave one out importance deal
			errOutHat = np.zeros(nRegsHat) 
			if nRegsHat>1:
				for j in range(nRegsHat):
					Xprime = np.delete(Xhat,j,axis=1)

					# residual bs time
					sumErr = 0
					sumSqErr = 0
					for i in range(nSamp):
						# cv to get the errors
						err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
						sumErr = err.mErr[0] + sumErr
						sumSqErr = err.mErr[0]**2 + sumSqErr

					errOutHat[j] = sumErr/nSamp

			elif nRegsHat==1:
				errOutHat[0] = aveNullErr

			# lets do leave only one
			errInHat = np.zeros(nRegsHat) 
			for j in range(nRegsHat):
				Xprime = np.zeros((nObs,1))
				Xprime[:,0] = Xhat[:,j]

				# residual bs time
				sumErr = 0
				sumSqErr = 0
				for i in range(nSamp):
					# cv to get the errors
					err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
					sumErr = err.mErr[0] + sumErr
					sumSqErr = err.mErr[0]**2 + sumSqErr

				errInHat[j] = sumErr/nSamp

			errOut = np.zeros(nRegs)
			errOut[indices] = errOutHat
			solution['errOut'] = errOut
			errIn = np.zeros(nRegs)
			errIn[indices] = errInHat
			solution['errIn'] = errIn

			solution = {}
			if estErr:
				sumNullErr = 0
				sumSqNullErr = 0
				for i in range(nSamp):
					# cv over this thing to get the null model errors
					nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
					sumNullErr = sumNullErr + nullErr
					sumSqNullErr = sumSqNullErr + nullErr**2
				# get averages and variances
				aveNullErr = sumNullErr/nSamp
				sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)
				aveErr = aveNullErr
				sdErr = sdNullErr
				solution['aveErr'] = aveErr
				solution['sdErr'] = sdErr
				solution['aveNullErr'] = aveNullErr
				solution['sdNullErr'] = sdNullErr

			solution['aveCoef'] = np.zeros(nRegsFull)
			solution['sdCoef'] = np.zeros(nRegsFull)
			solution['medCoef'] = np.zeros(nRegsFull)
			solution['pSup'] = np.zeros(nRegsFull)
			solution['indices'] = np.array([])


	return solution, enm 
Пример #4
def runTest(X,y):
	nSamp = 250
	nObs,nRegs = X.shape
	# selection via bootstrap
	err,enm,allVals = fitSampling(X,y,1,nSamp,method='bs')
	errV = err.mErr
	tmpIndex = np.argmin(errV)

	# get the bootstrap full values
	bsAll = allVals[tmpIndex,:]

	# other important values
	lam = enm.lambdas[tmpIndex]
	yHat = enm.predict(X)[:,tmpIndex]
	coefIndex = enm.indices
	# get the bootstrap residual response samples
	res = y - yHat
	resCent = res-np.mean(res)
	ySample = np.zeros((nObs,nSamp))
	for i in range(nSamp):
		resSample = st.sampleWR(resCent)
		ySample[:,i] = yHat+resSample

	# get the cv error estimated over bs residual responses
	errSample = np.zeros(nSamp)
	for i in range(nSamp):
		err,tmp,tmpallVals = fitSampling(X,ySample[:,i],1,10,method='cv',lambdas=[lam])
		errV = err.mErr
		#should be only one value here
		if len(errV)>1:
			raise ValueError('something wrong with bs res cv')
		errSample[i] = errV[0]
	bsResAll = errSample

	#now let repeat this stuff on the selected sample
	Xhat = X[:,coefIndex]
	err,enm,allVals = fitSampling(Xhat,y,1,nSamp,method='bs',lambdas=[lam])
	bsSub = allVals

	# get the cv error estimated over bs residual responses
	errSample = np.zeros(nSamp)
	for i in range(nSamp):
		err,tmpenm,tmpallVals = fitSampling(Xhat,ySample[:,i],1,10,method='cv',lambdas=[lam])
		errV = err.mErr
		#should be only one value here
		if len(errV)>1:
			raise ValueError('something wrong with bs res cv')
		errSample[i] = errV[0]
	bsResSub = errSample

	vals = np.zeros((4,nSamp))
	vals[0,:] = bsAll
	vals[1,:] = bsResAll
	vals[2,:] = bsSub
	vals[3,:] = bsResSub

	return vals
Пример #5
def run(X,y,name):
	nSamp = 100
	alphaList = np.array([1])#np.arange(.1,1.1,.1)
	nObs,nRegs = X.shape
	sdY = np.sqrt(np.var(y))
	# selection via bootstrap
	bestMin = 1E10
	for a in alphaList:
		tmpErr,tmpEnm,allVals = fitSampling(X,y,a,nSamp,method='bs')
		tmpErrV = tmpErr.mErr
		tmpMin = np.min(tmpErrV)
		print tmpMin
		if tmpMin < bestMin:
			bestMin = tmpMin
			modelIndex = np.argmin(tmpErrV)
			enm = tmpEnm
			err = tmpErr
			alpha = a
	# important values
	lam = enm.lambdas[modelIndex]
	yHat = enm.predict(X)[:,modelIndex]
	intercept = enm.intercept[modelIndex]
	globalCoef = enm.coef[np.abs(enm.coef[:,modelIndex])>1E-21,modelIndex]
	coefIndex = enm.indices[np.abs(enm.coef[:,modelIndex])>1E-21]
	notEmpty = len(coefIndex) > 0

	# get the bootstrap residual response samples
	res = y - yHat
	resCent = res-np.mean(res)
	ySample = np.zeros((nObs,nSamp))
	for i in range(nSamp):
		resSample = st.sampleWR(resCent)
		ySample[:,i] = yHat+resSample

	notEmpty = len(coefIndex) > 0

	if notEmpty:
		# working on subset now
		Xhat = X[:,coefIndex]
		nObs,nRegsHat = Xhat.shape
		sdXhat = np.sqrt(np.var(Xhat,0))

		# residual bs time
		sumErr = 0
		sumSqErr = 0
		sumNullErr = 0
		sumSqNullErr = 0
		sc = np.zeros(nRegsHat)
		sSqc = np.zeros(nRegsHat)
		sumSup = np.zeros(nRegsHat)

		for i in range(nSamp):
			# cv to get the errors
			err,tmpEnm,tmpallVals = fitSampling(Xhat,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
			sumErr = err.mErr[0] + sumErr
			sumSqErr = err.mErr[0]**2 + sumSqErr
			# cv over this thing to get the null model errors
			nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
			sumNullErr = sumNullErr + nullErr
			sumSqNullErr = sumSqNullErr + nullErr**2
			# need the coef
			# they change so we need to map the back to the original
			tmpEnm = enet.fit(Xhat,ySample[:,i], alpha,lambdas=[lam])
			sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0]
			sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2
			# find supports 
			occur = np.zeros(len(tmpEnm.coef[:,0]))
			occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0
			sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur

		# get averages and variances
		aveErr = sumErr/nSamp
		sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2)
		aveNullErr = sumNullErr/nSamp
		sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)
		aveCoef = sc/nSamp
		sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2)
		pSup = sumSup/nSamp

		# let do the leave one out importance deal
		codN = np.zeros(nRegsHat) 
		if nRegsHat>1:
			for j in range(nRegsHat):
				Xprime = np.delete(Xhat,j,axis=1)

				# residual bs time
				sumErr = 0
				sumSqErr = 0
				for i in range(nSamp):
					# cv to get the errors
					err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
					sumErr = err.mErr[0] + sumErr
					sumSqErr = err.mErr[0]**2 + sumSqErr

				codN[j] = sumErr/nSamp

		elif nRegsHat==1:
			codN[0] = aveNullErr

		# lets do leave only one
		cod1 = np.zeros(nRegsHat) 
		for j in range(nRegsHat):
			Xprime = np.zeros((nObs,1))
			Xprime[:,0] = Xhat[:,j]

			# residual bs time
			sumErr = 0
			sumSqErr = 0
			for i in range(nSamp):
				# cv to get the errors
				err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
				sumErr = err.mErr[0] + sumErr
				sumSqErr = err.mErr[0]**2 + sumSqErr

			cod1[j] = sumErr/nSamp

		# now we are going to estimate
		# some pvalues.  it should
		# be noted: that we want to use
		# permutation, to get a real feel
		# for random or unrelated data 
		# but we dont want to run a bs
		# for each perm (but we should)
		# so in here we are using the 
		# ols stderr to get the test stat
		# we will record a bunch of stuff 
		# from here to look at latter
		p,tStat,tStatPerm,olsSE = regStat.netTTestPermute(Xhat,y,lam,alpha,nperm=1000)
		n,m = tStatPerm.shape
		# would like to check if any values are nan
		# this most likly means the gpd failed in goodness of fit for tail
		# will use direct permutation values as the estimate in that case 
		# *** some other form of automated checking might be good here
		for i in range(n):
			if np.isnan(p[i]):
				z = tStatPerm[i,:]
				tmp = np.sum(z>tStat[i]) 
				p[i] = float(tmp)/float(m)
			# residual bs time
		sumNullErr = 0
		sumSqNullErr = 0
		for i in range(nSamp):
			# cv over this thing to get the null model errors
			nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
			sumNullErr = sumNullErr + nullErr
			sumSqNullErr = sumSqNullErr + nullErr**2
		# get averages and variances
		aveNullErr = sumNullErr/nSamp
		sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)
		aveErr = aveNullErr
		sdErr = sdNullErr

	# we have it all, lets print it
	f = open('SLR2run_'+name+'.dat','w')




	if notEmpty:









Пример #6
def estModel(XFull,y,nSamp=100,alphaList=np.array([1]),indType='coef',estErr=True,estImp=True,reduceX=False,params=[],):
	"""Estimate a mean, median and standard deviation
	for an elastic net model using bootstrap 
	Bootstrap resampling is used to select
	model parameters, then the bs res at these 
	params is used on the full feature set X
	to calculate the stats.  nSamp is used for
	selection and stat estimates.

	*indType* determines which stat to use for indicies.
	Indices report the non zero entries in the sparse
	regression model.  Possible types:
	coef - use coefs from full fit after the selection
	ave - use the avereage coefs after the bs, typically
	includes many more regressors, not recomended
	as the average removes sparsity benifit.
	med - use the median value after the bs, typically 
	fewer regressors chosen then 'coef'

	if *estErr* then 10 fold CV is used to estimate 
	the prediction error at each iteration of the bs.
	This is ten extra iterations at each bs res 
	sample, but reduces the bias in prediction error.
	The mean and sdDev of the CV error is then reported.

	If *estImp* then the importance of each selected 
	regressor is estimated.  For errOut this is the error
	if the regressor is removed, multi varriate error.
	For errIn this is the error if the regressor is alone,
	univariate error.

	If *reduceX* then the regressor matrix is ruduced 
	based on the full model fit after selection.  Only
	non zero coef are kept, much faster, but biases the 
	other stats.  
	NOTE: This was never tested after the last 
	migration, its possible the indices in the solution 
	do not match the orginal ones

	If *params* are passed then we assume its a tuple
	with the (lambda,alpha) model parameters.  In this case 
	model selection is bipassed. and these params are used.

	nObs,nRegsFull = XFull.shape
	# select full model values
	if len(params)==2:
		lam,alpha = params
		enm = enet.fit(XFull,y,alpha,lambdas=[lam])[0]
		enm = select(XFull,y,nSamp,alphaList)
	lam = enm.lambdas[0]
	yHat = enm.predict(XFull)
	intercept = enm.intercept[0]
	globalCoef =enm.coef[np.abs(enm.coef)>1E-21]
	coefIndex = enm.indices[np.abs(enm.coef)>1E-21]
	alpha = enm.alpha

	# now is when we reduce the x if we need too!
	if reduceX:
		nRegs = len(coefIndex)
		if nRegs > 0:
			X = XFull[:,coefIndex]
			nObs, _ = X.shape
		X = XFull
		nRegs = nRegsFull

	# get the bootstrap residual response samples
	res = y - yHat
	resCent = res-np.mean(res)
	ySample = np.zeros((nObs,nSamp))
	for i in range(nSamp):
		resSample = st.sampleWR(resCent)
		ySample[:,i] = yHat+resSample

	if nRegs > 0:
		# residual bs time
		if estErr:
			sumErr = 0
			sumSqErr = 0
			sumNullErr = 0
			sumSqNullErr = 0

		sc = np.zeros(nRegs)
		sSqc = np.zeros(nRegs)
		ac = lil_matrix((nRegs,nSamp))
		sumSup = np.zeros(nRegs)

		for i in range(nSamp):
			# cv to get the errors
			if estErr:
				err,tmpEnm,tmpallVals = fitSampling(X,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
				sumErr = err.mErr[0] + sumErr
				sumSqErr = err.mErr[0]**2 + sumSqErr
				# cv over this thing to get the null model errors
				nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
				sumNullErr = sumNullErr + nullErr
				sumSqNullErr = sumSqNullErr + nullErr**2

			# need the coef
			# they change so we need to map the back to the original
			tmpEnm = enet.fit(X,ySample[:,i], alpha,lambdas=[lam])
			sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0]
			sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2
			if len(tmpEnm.indices)>0:
				ac[tmpEnm.indices,i] = tmpEnm.coef
			# find supports 
			occur = np.zeros(len(tmpEnm.coef[:,0]))
			occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0
			sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur

		# get averages and variances
		if estErr:
			aveErr = sumErr/nSamp
			sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2)
			aveNullErr = sumNullErr/nSamp
			sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)

		aveCoef = sc/nSamp
		sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2)
		#some crazy stuff here becase of the way scipy mat is shaped
		medCoef = np.array(np.median(ac.todense(),1))[:,0]
		pSup = sumSup/nSamp

		# lets do the selection 
		if indType=='coef':
			indices = coefIndex
		elif indType=='med':
			indices = np.arange(nRegs)[np.abs(medCoef)>1E-21]
		elif indType=='ave':
			indices = np.arange(nRegs)[np.abs(aveCoef)>1E-21]
			raise ValueError('The indType '+indType+' is not valid.')

		# put it in a dict for simplicity 
		solution = {}
		if estErr:
			solution['aveErr'] = aveErr
			solution['sdErr'] = sdErr
			solution['aveNullErr'] = aveNullErr
			solution['sdNullErr'] = sdNullErr
		if reduceX:
			# need to go back to the original indicies 
			solution['aveCoef'] = np.zeros(nRegsFull)
			solution['sdCoef'] = np.zeros(nRegsFull)
			solution['medCoef'] = np.zeros(nRegsFull)
			solution['pSup'] = np.zeros(nRegsFull)

			solution['aveCoef'][coefIndex] = aveCoef
			solution['sdCoef'][coefIndex] = sdCoef
			solution['medCoef'][coefIndex] = medCoef
			solution['pSup'][coefIndex] = pSup
			solution['indices'] = coefIndex[indices]
			solution['aveCoef'] = aveCoef
			solution['sdCoef'] = sdCoef
			solution['medCoef'] = medCoef
			solution['pSup'] = pSup
			solution['indices'] = indices
		nRegsHat = len(indices)
		if nRegsHat>0 and estImp:
			Xhat = X[:,indices]
			# lets do the leave one out importance deal
			errOutHat = np.zeros(nRegsHat) 
			if nRegsHat>1:
				for j in range(nRegsHat):
					Xprime = np.delete(Xhat,j,axis=1)

					# residual bs time
					sumErr = 0
					sumSqErr = 0
					for i in range(nSamp):
						# cv to get the errors
						err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
						sumErr = err.mErr[0] + sumErr
						sumSqErr = err.mErr[0]**2 + sumSqErr

					errOutHat[j] = sumErr/nSamp

			elif nRegsHat==1:
				errOutHat[0] = aveNullErr

			# lets do leave only one
			errInHat = np.zeros(nRegsHat) 
			for j in range(nRegsHat):
				Xprime = np.zeros((nObs,1))
				Xprime[:,0] = Xhat[:,j]

				# residual bs time
				sumErr = 0
				sumSqErr = 0
				for i in range(nSamp):
					# cv to get the errors
					err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam])
					sumErr = err.mErr[0] + sumErr
					sumSqErr = err.mErr[0]**2 + sumSqErr

				errInHat[j] = sumErr/nSamp

			errOut = np.zeros(nRegs)
			errOut[indices] = errOutHat
			solution['errOut'] = errOut
			errIn = np.zeros(nRegs)
			errIn[indices] = errInHat
			solution['errIn'] = errIn

			solution = {}
			if estErr:
				sumNullErr = 0
				sumSqNullErr = 0
				for i in range(nSamp):
					# cv over this thing to get the null model errors
					nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv')
					sumNullErr = sumNullErr + nullErr
					sumSqNullErr = sumSqNullErr + nullErr**2
				# get averages and variances
				aveNullErr = sumNullErr/nSamp
				sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2)
				aveErr = aveNullErr
				sdErr = sdNullErr
				solution['aveErr'] = aveErr
				solution['sdErr'] = sdErr
				solution['aveNullErr'] = aveNullErr
				solution['sdNullErr'] = sdNullErr

			solution['aveCoef'] = np.zeros(nRegsFull)
			solution['sdCoef'] = np.zeros(nRegsFull)
			solution['medCoef'] = np.zeros(nRegsFull)
			solution['pSup'] = np.zeros(nRegsFull)
			solution['indices'] = np.array([])


	return solution, enm