def _get_spectra(self, with_gradient=False): #- Setup data for a Resolution matrix sigma2 = 4.0 ndiag = 21 xx = np.linspace(-(ndiag - 1) / 2.0, +(ndiag - 1) / 2.0, ndiag) Rdata = np.zeros((self.nspec, ndiag, self.nwave)) for i in range(self.nspec): kernel = np.exp(-(xx + float(i) / self.nspec * 0.3)**2 / (2 * sigma2)) #kernel = np.exp(-xx**2/(2*sigma2)) kernel /= sum(kernel) for j in range(self.nwave): Rdata[i, :, j] = kernel flux = np.zeros((self.nspec, self.nwave), dtype=float) ivar = np.ones((self.nspec, self.nwave), dtype=float) # Add a random component for i in range(self.nspec): ivar[i] += 0.4 * np.random.uniform(size=self.nwave) mask = np.zeros((self.nspec, self.nwave), dtype=int) fibermap = empty_fibermap(self.nspec, 1500) fibermap['OBJTYPE'][0::2] = 'SKY' x = fibermap["FIBERASSIGN_X"] y = fibermap["FIBERASSIGN_Y"] x = x - np.mean(x) y = y - np.mean(y) if np.std(x) > 0: x /= np.std(x) if np.std(y) > 0: y /= np.std(y) w = (self.wave - self.wave[0]) / (self.wave[-1] - self.wave[0]) * 2. - 1 for i in range(self.nspec): R = Resolution(Rdata[i]) if with_gradient: scale = 1. + (0.1 * x[i] + 0.2 * y[i]) * (1 + 0.4 * w) flux[i] = R.dot(scale * self.flux) else: flux[i] = R.dot(self.flux) meta = {"camera": "r2"} return Frame(self.wave, flux, ivar, mask, Rdata, spectrograph=2, fibermap=fibermap, meta=meta)
def _get_spectra(self): #- Setup data for a Resolution matrix sigma = 4.0 ndiag = 21 xx = np.linspace(-(ndiag - 1) / 2.0, +(ndiag - 1) / 2.0, ndiag) Rdata = np.zeros((self.nspec, ndiag, self.nwave)) for i in range(self.nspec): for j in range(self.nwave): kernel = np.exp(-xx**2 / (2 * sigma)) kernel /= sum(kernel) Rdata[i, :, j] = kernel flux = np.zeros((self.nspec, self.nwave)) ivar = np.ones((self.nspec, self.nwave)) mask = np.zeros((self.nspec, self.nwave), dtype=int) for i in range(self.nspec): R = Resolution(Rdata[i]) flux[i] = R.dot(self.flux) fibermap = desispec.io.empty_fibermap(self.nspec, 1500) fibermap['OBJTYPE'][0::2] = 'SKY' return Frame(self.wave, flux, ivar, mask, Rdata, spectrograph=2, fibermap=fibermap)
def _get_spectra(self,with_gradient=False): #- Setup data for a Resolution matrix sigma2 = 4.0 ndiag = 21 xx = np.linspace(-(ndiag-1)/2.0, +(ndiag-1)/2.0, ndiag) Rdata = np.zeros( (self.nspec, ndiag, self.nwave) ) for i in range(self.nspec): kernel = np.exp(-(xx+float(i)/self.nspec*0.3)**2/(2*sigma2)) #kernel = np.exp(-xx**2/(2*sigma2)) kernel /= sum(kernel) for j in range(self.nwave): Rdata[i,:,j] = kernel flux = np.zeros((self.nspec, self.nwave),dtype=float) ivar = np.ones((self.nspec, self.nwave),dtype=float) # Add a random component for i in range(self.nspec) : ivar[i] += 0.4*np.random.uniform(size=self.nwave) mask = np.zeros((self.nspec, self.nwave), dtype=int) fibermap = desispec.io.empty_fibermap(self.nspec, 1500) fibermap['OBJTYPE'][0::2] = 'SKY' x=fibermap["DESIGN_X"] y=fibermap["DESIGN_Y"] x = x-np.mean(x) y = y-np.mean(y) if np.std(x)>0 : x /= np.std(x) if np.std(y)>0 : y /= np.std(y) w = (self.wave-self.wave[0])/(self.wave[-1]-self.wave[0])*2.-1 for i in range(self.nspec): R = Resolution(Rdata[i]) if with_gradient : scale = 1.+(0.1*x[i]+0.2*y[i])*(1+0.4*w) flux[i] = R.dot(scale*self.flux) else : flux[i] = R.dot(self.flux) return Frame(self.wave, flux, ivar, mask, Rdata, spectrograph=2, fibermap=fibermap)
def _get_spectra(self): #- Setup data for a Resolution matrix sigma = 4.0 ndiag = 21 xx = np.linspace(-(ndiag-1)/2.0, +(ndiag-1)/2.0, ndiag) Rdata = np.zeros( (self.nspec, ndiag, self.nwave) ) for i in range(self.nspec): for j in range(self.nwave): kernel = np.exp(-xx**2/(2*sigma)) kernel /= sum(kernel) Rdata[i,:,j] = kernel flux = np.zeros((self.nspec, self.nwave)) ivar = np.ones((self.nspec, self.nwave)) mask = np.zeros((self.nspec, self.nwave), dtype=int) for i in range(self.nspec): R = Resolution(Rdata[i]) flux[i] = R.dot(self.flux) fibermap = desispec.io.empty_fibermap(self.nspec) fibermap['OBJTYPE'][0::2] = 'SKY' return Frame(self.wave, flux, ivar, mask, Rdata), fibermap
def compute_non_uniform_sky(frame, nsig_clipping=4., max_iterations=10, model_ivar=False, add_variance=True, angular_variation_deg=1): """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] ( Flux_0[j] + x[fiber]*Flux_x[j] + y[fiber]*Flux_y[j] + ... ) Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance angular_variation_deg : degree of 2D polynomial correction as a function of fiber focal plane coordinates (default=1). One set of coefficients per wavelength returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] # need focal plane coordinates of fibers x = frame.fibermap["FIBERASSIGN_X"][skyfibers] y = frame.fibermap["FIBERASSIGN_Y"][skyfibers] # normalize for numerical stability xm = np.mean(frame.fibermap["FIBERASSIGN_X"]) ym = np.mean(frame.fibermap["FIBERASSIGN_Y"]) xs = np.std(frame.fibermap["FIBERASSIGN_X"]) ys = np.std(frame.fibermap["FIBERASSIGN_Y"]) if xs == 0: xs = 1 if ys == 0: ys = 1 x = (x - xm) / xs y = (y - ym) / ys # precompute the monomials for the sky fibers log.debug("compute monomials for deg={}".format(angular_variation_deg)) monomials = [] for dx in range(angular_variation_deg + 1): for dy in range(angular_variation_deg + 1 - dx): monomials.append((x**dx) * (y**dy)) ncoef = len(monomials) monomials = np.array(monomials) input_ivar = None if model_ivar: log.info( "use a model of the inverse variance to remove bias due to correlated ivar and flux" ) input_ivar = current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar, axis=0) median_ivar_vs_fiber = np.median(current_ivar, axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]): threshold = 0.01 current_ivar[f] = median_ivar_vs_fiber[ f] / median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) nout_tot = 0 for iteration in range(max_iterations): # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # with x_fiber,y_fiber the fiber coordinates in the focal plane (or sky) # the unconvolved sky flux at wavelength i is a polynomial of x_fiber,y_fiber # sky(fiber,i) = pol(x_fiber,y_fiber,p) = sum_p a_ip * x_fiber**degx(p) y_fiber**degy(p) # sky(fiber,i) = sum_p monom[fiber,p] * a_ip # the convolved sky flux at wavelength w is # model[fiber,w] = sum_i R[fiber][w,i] sum_p monom[fiber,p] * a_ip # model[fiber,w] = sum_p monom[fiber,p] R[fiber][w,i] a_ip # # so, the matrix A is composed of blocks (p,k) corresponding to polynomial coefficient indices where # A[pk] = sum_fiber monom[fiber,p]*monom[fiber,k] sqrtwR[fiber] sqrtwR[fiber]^t # similarily # B[p] = sum_fiber monom[fiber,p] * sum_wave_w (sqrt(ivar)[fiber,w]*flux[fiber,w]) sqrtwR[fiber,wave] A = np.zeros((nwave * ncoef, nwave * ncoef)) B = np.zeros((nwave * ncoef)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber %d/%d" % (iteration, fiber, nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD * R # each row r of R is multiplied by sqrtw[r] #wRtR=(sqrtwR.T*sqrtwR).tocsr() wRtR = (sqrtwR.T * sqrtwR).todense() wRtF = sqrtwR.T * sqrtwflux[fiber] # loop on polynomial coefficients (double loop for A) # fill only blocks of A and B for p in range(ncoef): for k in range(ncoef): A[p * nwave:(p + 1) * nwave, k * nwave:(k + 1) * nwave] += monomials[p, fiber] * monomials[k, fiber] * wRtR B[p * nwave:(p + 1) * nwave] += monomials[p, fiber] * wRtF log.info("iter %d solving" % iteration) w = A.diagonal() > 0 A_pos_def = A[w, :] A_pos_def = A_pos_def[:, w] parameters = B * 0 try: parameters[w] = cholesky_solve(A_pos_def, B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format( iteration)) parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0] log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): # loop on polynomial indices unconvolved_fiber_sky_flux = np.zeros(nwave) for p in range(ncoef): unconvolved_fiber_sky_flux += monomials[ p, fiber] * parameters[p * nwave:(p + 1) * nwave] # then convolve fiber_convolved_sky_flux = Rsky[fiber].dot( unconvolved_fiber_sky_flux) chi2[fiber] = current_ivar[fiber] * (flux[fiber] - fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter)) if nout_iter == 0: break log.info("nout tot=%d" % nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # is there a different method to compute this ? log.info("compute covariance") try: parameter_covar = cholesky_invert(A) except np.linalg.linalg.LinAlgError: log.warning( "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv" ) parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data = np.mean(frame.resolution_data, axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") cskyflux = np.zeros(frame.flux.shape) cskyivar = np.zeros(frame.flux.shape) log.info("compute convolved parameter covariance") # The covariance of the parameters is composed of ncoef*ncoef blocks each of size nwave*nwave # A block (p,k) is the covariance of the unconvolved spectra p and k , corresponding to the polynomial indices p and k # We first sandwich each block with the average resolution. convolved_parameter_covar = np.zeros((ncoef, ncoef, nwave)) for p in range(ncoef): for k in range(ncoef): convolved_parameter_covar[p, k] = np.diagonal( Rmean.dot(parameter_covar[p * nwave:(p + 1) * nwave, k * nwave:(k + 1) * nwave]).dot( Rmean.T.todense())) ''' import astropy.io.fits as pyfits pyfits.writeto("convolved_parameter_covar.fits",convolved_parameter_covar,overwrite=True) # other approach log.info("dense Rmean...") Rmean=Rmean.todense() log.info("invert Rinv...") Rinv=np.linalg.inv(Rmean) # check this print("0?",np.max(np.abs(Rinv.dot(Rmean)-np.eye(Rmean.shape[0])))/np.max(np.abs(Rmean))) convolved_parameter_ivar=np.zeros((ncoef,ncoef,nwave)) for p in range(ncoef) : for k in range(ncoef) : convolved_parameter_ivar[p,k] = np.diagonal(Rinv.T.dot(A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rinv)) # solve for each wave separately convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave)) for i in range(nwave) : print("inverting ivar of wave %d/%d"%(i,nwave)) convolved_parameter_covar[:,:,i] = cholesky_invert(convolved_parameter_ivar[:,:,i]) pyfits.writeto("convolved_parameter_covar_bis.fits",convolved_parameter_covar,overwrite=True) import sys sys.exit(12) ''' # Now we compute the sky model variance for each fiber individually # accounting for its focal plane coordinates # so that a target fiber distant for a sky fiber will naturally have a larger # sky model variance log.info("compute sky and variance per fiber") for i in range(frame.nspec): # compute monomials M = [] xi = (frame.fibermap["FIBERASSIGN_X"][i] - xm) / xs yi = (frame.fibermap["FIBERASSIGN_Y"][i] - ym) / ys for dx in range(angular_variation_deg + 1): for dy in range(angular_variation_deg + 1 - dx): M.append((xi**dx) * (yi**dy)) M = np.array(M) unconvolved_fiber_sky_flux = np.zeros(nwave) convolved_fiber_skyvar = np.zeros(nwave) for p in range(ncoef): unconvolved_fiber_sky_flux += M[p] * parameters[p * nwave:(p + 1) * nwave] for k in range(ncoef): convolved_fiber_skyvar += M[p] * M[ k] * convolved_parameter_covar[p, k] # convolve sky model with this fiber's resolution cskyflux[i] = frame.R[i].dot(unconvolved_fiber_sky_flux) # save inverse of variance cskyivar[i] = (convolved_fiber_skyvar > 0) / ( convolved_fiber_skyvar + (convolved_fiber_skyvar == 0)) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance: modified_cskyivar = _model_variance(frame, cskyflux, cskyivar, skyfibers) else: modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel( frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar=cskyivar) # keep a record of the statistical ivar for QA
def compute_polynomial_times_sky(frame, nsig_clipping=4., max_iterations=30, model_ivar=False, add_variance=True, angular_variation_deg=1, chromatic_variation_deg=1): """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Polynomial(x[fiber],y[fiber],wavelength[j]) Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar = None if model_ivar: log.info( "use a model of the inverse variance to remove bias due to correlated ivar and flux" ) input_ivar = current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar, axis=0) median_ivar_vs_fiber = np.median(current_ivar, axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]): threshold = 0.01 current_ivar[f] = median_ivar_vs_fiber[ f] / median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] # need focal plane coordinates x = frame.fibermap["FIBERASSIGN_X"] y = frame.fibermap["FIBERASSIGN_Y"] # normalize for numerical stability xm = np.mean(x) ym = np.mean(y) xs = np.std(x) ys = np.std(y) if xs == 0: xs = 1 if ys == 0: ys = 1 x = (x - xm) / xs y = (y - ym) / ys w = (frame.wave - frame.wave[0]) / (frame.wave[-1] - frame.wave[0]) * 2. - 1 # precompute the monomials for the sky fibers log.debug("compute monomials for deg={} and {}".format( angular_variation_deg, chromatic_variation_deg)) monomials = [] for dx in range(angular_variation_deg + 1): for dy in range(angular_variation_deg + 1 - dx): xypol = (x**dx) * (y**dy) for dw in range(chromatic_variation_deg + 1): wpol = w**dw monomials.append(np.outer(xypol, wpol)) ncoef = len(monomials) coef = np.zeros((ncoef)) allfibers_monomials = np.array(monomials) log.debug("shape of allfibers_monomials = {}".format( allfibers_monomials.shape)) skyfibers_monomials = allfibers_monomials[:, skyfibers, :] log.debug("shape of skyfibers_monomials = {}".format( skyfibers_monomials.shape)) sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) Pol = np.ones(flux.shape, dtype=float) coef[0] = 1. nout_tot = 0 previous_chi2 = -10. for iteration in range(max_iterations): # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*Pol(x,y,wave)*sky # the parameters are the unconvolved sky flux at the wavelength i # and the polynomial coefficients A = np.zeros((nwave, nwave), dtype=float) B = np.zeros((nwave), dtype=float) D = scipy.sparse.lil_matrix((nwave, nwave)) D2 = scipy.sparse.lil_matrix((nwave, nwave)) Pol /= coef[0] # force constant term to 1. # solving for the deconvolved mean sky spectrum # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber (1st fit) %d/%d" % (iteration, fiber, nfibers)) D.setdiag(sqrtw[fiber]) D2.setdiag(Pol[fiber]) sqrtwRP = D.dot(Rsky[fiber]).dot( D2) # each row r of R is multiplied by sqrtw[r] A += (sqrtwRP.T * sqrtwRP).todense() B += sqrtwRP.T * sqrtwflux[fiber] log.info("iter %d solving" % iteration) w = A.diagonal() > 0 A_pos_def = A[w, :] A_pos_def = A_pos_def[:, w] parameters = B * 0 try: parameters[w] = cholesky_solve(A_pos_def, B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format( iteration)) parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0] # parameters = the deconvolved mean sky spectrum # now evaluate the polynomial coefficients Ap = np.zeros((ncoef, ncoef), dtype=float) Bp = np.zeros((ncoef), dtype=float) D2.setdiag(parameters) for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber (2nd fit) %d/%d" % (iteration, fiber, nfibers)) D.setdiag(sqrtw[fiber]) sqrtwRSM = D.dot(Rsky[fiber]).dot(D2).dot( skyfibers_monomials[:, fiber, :].T) Ap += sqrtwRSM.T.dot(sqrtwRSM) Bp += sqrtwRSM.T.dot(sqrtwflux[fiber]) # Add huge prior on zeroth angular order terms to converge faster # (because those terms are degenerate with the mean deconvolved spectrum) weight = 1e24 Ap[0, 0] += weight Bp[0] += weight # force 0th term to 1 for i in range(1, chromatic_variation_deg + 1): Ap[i, i] += weight # force other wavelength terms to 0 coef = cholesky_solve(Ap, Bp) log.info("pol coef = {}".format(coef)) # recompute the polynomial values Pol = skyfibers_monomials.T.dot(coef).T # chi2 and outlier rejection log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): chi2[fiber] = current_ivar[fiber] * ( flux[fiber] - Rsky[fiber].dot(Pol[fiber] * parameters))**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%g ndf=%d chi2pdf=%f delta=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, abs(sum_chi2 - previous_chi2), nout_iter)) if nout_iter == 0 and abs(sum_chi2 - previous_chi2) < 0.2: break previous_chi2 = sum_chi2 + 0. log.info("nout tot=%d" % nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # we ignore here the fact that we have fit a angular variation, # so the sky model uncertainties are inaccurate log.info("compute the parameter covariance") try: parameter_covar = cholesky_invert(A) except np.linalg.linalg.LinAlgError: log.warning( "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv" ) parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data = np.mean(frame.resolution_data, axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar = Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var = np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar = (convolved_sky_var > 0) / (convolved_sky_var + (convolved_sky_var == 0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) Pol = allfibers_monomials.T.dot(coef).T for fiber in range(frame.nspec): cskyflux[fiber] = frame.R[fiber].dot(Pol[fiber] * parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance: modified_cskyivar = _model_variance(frame, cskyflux, cskyivar, skyfibers) else: modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel( frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar=cskyivar) # keep a record of the statistical ivar for QA
def compute_uniform_sky(frame, nsig_clipping=4., max_iterations=100, model_ivar=False, add_variance=True): """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar = None if model_ivar: log.info( "use a model of the inverse variance to remove bias due to correlated ivar and flux" ) input_ivar = current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar, axis=0) median_ivar_vs_fiber = np.median(current_ivar, axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]): threshold = 0.01 current_ivar[f] = median_ivar_vs_fiber[ f] / median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) nout_tot = 0 for iteration in range(max_iterations): # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*sky # and the parameters are the unconvolved sky flux at the wavelength i # so, d(model)/di[fiber,w] = R[fiber][w,i] # this gives # A_ij = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w,i] R[fiber][w,j] # A = sum_fiber ( diag(sqrt(ivar))*R[fiber] ) ( diag(sqrt(ivar))* R[fiber] )^t # A = sum_fiber sqrtwR[fiber] sqrtwR[fiber]^t # and # B = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w] * flux[fiber,w] # B = sum_fiber sum_wave_w sqrt(ivar)[fiber,w]*flux[fiber,w] sqrtwR[fiber,wave] #A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() A = np.zeros((nwave, nwave)) B = np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber %d/%d" % (iteration, fiber, nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD * R # each row r of R is multiplied by sqrtw[r] A += (sqrtwR.T * sqrtwR).todense() B += sqrtwR.T * sqrtwflux[fiber] log.info("iter %d solving" % iteration) w = A.diagonal() > 0 A_pos_def = A[w, :] A_pos_def = A_pos_def[:, w] parameters = B * 0 try: parameters[w] = cholesky_solve(A_pos_def, B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format( iteration)) parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0] log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): # the parameters are directly the unconvolve sky flux # so we simply have to reconvolve it fiber_convolved_sky_flux = Rsky[fiber].dot(parameters) chi2[fiber] = current_ivar[fiber] * (flux[fiber] - fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter)) if nout_iter == 0: break log.info("nout tot=%d" % nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar log.info("compute the parameter covariance") # we may have to use a different method to compute this # covariance try: parameter_covar = cholesky_invert(A) # the above is too slow # maybe invert per block, sandwich by R except np.linalg.linalg.LinAlgError: log.warning( "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv" ) parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data = np.mean(frame.resolution_data, axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar = Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var = np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar = (convolved_sky_var > 0) / (convolved_sky_var + (convolved_sky_var == 0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance: modified_cskyivar = _model_variance(frame, cskyflux, cskyivar, skyfibers) else: modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel( frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar=cskyivar) # keep a record of the statistical ivar for QA
def compute_sky(frame, nsig_clipping=4.): """Compute a sky model. Input has to correspond to sky fibers only. Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) #debug #nfibers=min(nfibers,2) nout_tot = 0 for iteration in range(20): A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr() B = np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d fiber %d" % (iteration, fiber)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD * R # each row r of R is multiplied by sqrtw[r] A = A + (sqrtwR.T * sqrtwR).tocsr() B += sqrtwR.T * sqrtwflux[fiber] log.info("iter %d solving" % iteration) skyflux = cholesky_solve(A.todense(), B) log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): S = Rsky[fiber].dot(skyflux) chi2[fiber] = current_ivar[fiber] * (flux[fiber] - S)**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter)) if nout_iter == 0: break log.info("nout tot=%d" % nout_tot) # solve once again to get deconvolved sky variance skyflux, skycovar = cholesky_solve_and_invert(A.todense(), B) #- sky inverse variance, but incomplete and not needed anyway # skyvar=np.diagonal(skycovar) # skyivar=(skyvar>0)/(skyvar+(skyvar==0)) # Use diagonal of skycovar convolved with mean resolution of all fibers # first compute average resolution mean_res_data = np.mean(frame.resolution_data, axis=0) R = Resolution(mean_res_data) # compute convolved sky and ivar cskycovar = R.dot(skycovar).dot(R.T.todense()) cskyvar = np.diagonal(cskycovar) cskyivar = (cskyvar > 0) / (cskyvar + (cskyvar == 0)) # convert cskyivar to 2D; today it is the same for all spectra, # but that may not be the case in the future cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave) # Convolved sky cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(skyflux) # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask, nrej=nout_tot)
def sim_source_spectra(allinfo, allzbest, infofile='source-truth.fits', debug=False): """Build the residual (source) spectra. No redshift-fitting. """ from desispec.io import read_spectra, write_spectra from desispec.spectra import Spectra from desispec.interpolation import resample_flux from desispec.resolution import Resolution from redrock.external.desi import DistTargetsDESI from redrock.templates import find_templates, Template assert(np.all(allinfo['TARGETID'] == allzbest['TARGETID'])) nsim = len(allinfo) # Select the subset of objects for which we got the correct lens (BGS) # redshift. these = np.where((allzbest['SPECTYPE'] == 'GALAXY') * (np.abs(allzbest['Z'] - allinfo['LENS_Z']) < 0.003))[0] print('Selecting {}/{} lenses with the correct redshift'.format(len(these), nsim)) if len(these) == 0: raise ValueError('No spectra passed the cuts!') allinfo = allinfo[these] allzbest = allzbest[these] print('Writing {}'.format(infofile)) allinfo.write(infofile, overwrite=True) tempfile = find_templates()[0] rrtemp = Template(tempfile) # loop on each chunk of lens+source spectra nchunk = len(set(allinfo['CHUNK'])) for ichunk in set(allinfo['CHUNK']): I = np.where(allinfo['CHUNK'] == ichunk)[0] info = allinfo[I] zbest = allzbest[I] specfile = 'lenssource-spectra-chunk{:03d}.fits'.format(ichunk) sourcefile = 'source-spectra-chunk{:03d}.fits'.format(ichunk) spectra = read_spectra(specfile).select(targets=info['TARGETID']) for igal, zz in enumerate(zbest): zwave = rrtemp.wave * (1 + zz['Z']) zflux = rrtemp.flux.T.dot(zz['COEFF']).T #/ (1 + zz['Z']) if debug: fig, ax = plt.subplots() for band in spectra.bands: R = Resolution(spectra.resolution_data[band][igal]) # use fastspecfit here modelflux = R.dot(resample_flux(spectra.wave[band], zwave, zflux)) if debug: ax.plot(spectra.wave[band], spectra.flux[band][igal, :]) ax.plot(spectra.wave[band], modelflux) ax.set_ylim(np.median(spectra.flux['r'][igal, :]) + np.std(spectra.flux['r'][igal, :]) * np.array([-1.5, 3])) #ax.set_xlim(4500, 5500) spectra.flux[band][igal, :] -= modelflux # subtract if debug: qafile = 'source-spectra-chunk{:03d}-{}.png'.format(ichunk, igal) fig.savefig(qafile) plt.close() print('Writing {} spectra to {}'.format(len(zbest), sourcefile)) write_spectra(outfile=sourcefile, spec=spectra) return allinfo
color='k') ax.fill_between(this_rz_band, [0, 0], [1, 1], alpha=0.1, color='k') ax.legend(fontsize=6, loc='lower right') spectype = allzbest['SPECTYPE'][j].strip() subtype = allzbest['SUBTYPE'][j].strip() fulltype = (spectype, subtype) ncoeff = templates[fulltype].flux.shape[0] coeff = allzbest['COEFF'][j][0:ncoeff] tflux = templates[fulltype].flux.T.dot(coeff) twave = templates[fulltype].wave * (1 + allzbest[j]['Z']) txflux = R.dot( resample_flux(cand_spectra.wave['brz'], twave, tflux)) res = allflux[j] - txflux res_smooth = gaussian_filter1d(res, 25) ax.plot(allwave / (1. + (allzbest[j]['Z'])), res_smooth, 'k-', alpha=0.3) fig.tight_layout() outplot = '{}/transient_candidates_{}_{}.png'.format( plot_path, obsdate, tile_number) fig.savefig(outplot, dpi=200) print('Figure saved in {}', outplot) plt.clf()
def rrtemp_to_spectra(infile, nrshifts=None, tempfluxes=None, tempwaves=None, zbest=None): ''' returns a list of files of spectra class objects setup to run through redrock infile: must be a quickspectra infile with keys IVAR, MASK, RESOLUTION fileloc: file location tempfluxes: must be a list of redshift template fluxes tempwaves: must be a list of redshift template waves zbest: if tempfluxes and tempwaves are not provided the zbest data must be so the templates can be found ''' fileloc = os.path.dirname(infile) spectra = desispec.io.read_spectra(infile) if tempfluxes == None and tempwaves == None: tempfile = redrock.templates.find_templates()[0] rrtemp = redrock.templates.Template(tempfile, wave=spectra.wave) tempfluxes = [] tempwaves = [] for ii in range(len(zbest)): ncoeff = rrtemp.flux.shape[0] coeff = zbest['COEFF'][ii][:ncoeff] tempfluxes.append(rrtemp.flux.T.dot(coeff)) tempwaves.append(rrtemp.wave * (1+zbest[ii]['Z'])) if nrshifts == None: nrshifts = len(tempfluxes) spectra_fibermap = Table.read(infile, 'FIBERMAP') ivarb = fits.getdata(infile, 'B_IVAR') ivarr = fits.getdata(infile, 'R_IVAR') ivarz = fits.getdata(infile, 'Z_IVAR') maskb = fits.getdata(infile, 'B_MASK') maskr = fits.getdata(infile, 'R_MASK') maskz = fits.getdata(infile, 'Z_MASK') resb = fits.getdata(infile, 'B_RESOLUTION') resr = fits.getdata(infile, 'R_RESOLUTION') resz = fits.getdata(infile, 'Z_RESOLUTION') spectra_fibermaps = [] specfiles = [] reszbests = [] for row in spectra_fibermap: spectra_fibermaps.append(Table(row)) specdata = [] for ii in range(nrshifts): ivar = {'b': np.array([ivarb[ii]]), 'r': np.array([ivarr[ii]]), 'z': np.array([ivarz[ii]])} mask = {'b': np.array([maskb[ii]]), 'r': np.array([maskr[ii]]), 'z': np.array([maskz[ii]])} res = {'b': np.array([resb[ii]]), 'r': np.array([resr[ii]]), 'z': np.array([resz[ii]])} netflux = {'b': None, 'r': None, 'z': None} bands = [] for band in spectra.bands: R = Resolution(spectra.resolution_data[band][0]) txflux = R.dot(resample_flux(spectra.wave[band], tempwaves[ii], tempfluxes[ii])) netflux[band] = np.array([spectra.flux[band][ii] - txflux]) spec = Spectra(spectra.bands, spectra.wave, netflux, ivar, resolution_data=res, mask=mask, fibermap=spectra_fibermaps[ii], meta=None, single=True) residualoutfile = os.path.join(fileloc, 'residualdata-spectra-class-{}-{}'.format(infile[len(fileloc)+1:-5], ii)) specfile = desispec.io.write_spectra(outfile=residualoutfile, spec=spec) specfiles.append(specfile) return specfiles
def compute_non_uniform_sky(frame, nsig_clipping=4.,max_iterations=10,model_ivar=False,add_variance=True,angular_variation_deg=1) : """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] ( Flux_0[j] + x[fiber]*Flux_x[j] + y[fiber]*Flux_y[j] + ... ) Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance angular_variation_deg : degree of 2D polynomial correction as a function of fiber focal plane coordinates (default=1). One set of coefficients per wavelength returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] # need focal plane coordinates of fibers x = frame.fibermap["DESIGN_X"][skyfibers] y = frame.fibermap["DESIGN_Y"][skyfibers] # normalize for numerical stability xm = np.mean(frame.fibermap["DESIGN_X"]) ym = np.mean(frame.fibermap["DESIGN_Y"]) xs = np.std(frame.fibermap["DESIGN_X"]) ys = np.std(frame.fibermap["DESIGN_Y"]) if xs==0 : xs = 1 if ys==0 : ys = 1 x = (x-xm)/xs y = (y-ym)/ys # precompute the monomials for the sky fibers log.debug("compute monomials for deg={}".format(angular_variation_deg)) monomials=[] for dx in range(angular_variation_deg+1) : for dy in range(angular_variation_deg+1-dx) : monomials.append((x**dx)*(y**dy)) ncoef=len(monomials) monomials=np.array(monomials) input_ivar=None if model_ivar : log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux") input_ivar=current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar,axis=0) median_ivar_vs_fiber = np.median(current_ivar,axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]) : threshold=0.01 current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) nout_tot=0 for iteration in range(max_iterations) : # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # with x_fiber,y_fiber the fiber coordinates in the focal plane (or sky) # the unconvolved sky flux at wavelength i is a polynomial of x_fiber,y_fiber # sky(fiber,i) = pol(x_fiber,y_fiber,p) = sum_p a_ip * x_fiber**degx(p) y_fiber**degy(p) # sky(fiber,i) = sum_p monom[fiber,p] * a_ip # the convolved sky flux at wavelength w is # model[fiber,w] = sum_i R[fiber][w,i] sum_p monom[fiber,p] * a_ip # model[fiber,w] = sum_p monom[fiber,p] R[fiber][w,i] a_ip # # so, the matrix A is composed of blocks (p,k) corresponding to polynomial coefficient indices where # A[pk] = sum_fiber monom[fiber,p]*monom[fiber,k] sqrtwR[fiber] sqrtwR[fiber]^t # similarily # B[p] = sum_fiber monom[fiber,p] * sum_wave_w (sqrt(ivar)[fiber,w]*flux[fiber,w]) sqrtwR[fiber,wave] A=np.zeros((nwave*ncoef,nwave*ncoef)) B=np.zeros((nwave*ncoef)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] #wRtR=(sqrtwR.T*sqrtwR).tocsr() wRtR=(sqrtwR.T*sqrtwR).todense() wRtF=sqrtwR.T*sqrtwflux[fiber] # loop on polynomial coefficients (double loop for A) # fill only blocks of A and B for p in range(ncoef) : for k in range(ncoef) : A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave] += monomials[p,fiber]*monomials[k,fiber]*wRtR B[p*nwave:(p+1)*nwave] += monomials[p,fiber]*wRtF log.info("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A[w,:] A_pos_def = A_pos_def[:,w] parameters = B*0 try: parameters[w]=cholesky_solve(A_pos_def,B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format(iteration)) parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0] log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : # loop on polynomial indices unconvolved_fiber_sky_flux = np.zeros(nwave) for p in range(ncoef) : unconvolved_fiber_sky_flux += monomials[p,fiber]*parameters[p*nwave:(p+1)*nwave] # then convolve fiber_convolved_sky_flux = Rsky[fiber].dot(unconvolved_fiber_sky_flux) chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break log.info("nout tot=%d"%nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # is there a different method to compute this ? log.info("compute covariance") try : parameter_covar=cholesky_invert(A) except np.linalg.linalg.LinAlgError : log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv") parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data=np.mean(frame.resolution_data,axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") cskyflux = np.zeros(frame.flux.shape) cskyivar = np.zeros(frame.flux.shape) log.info("compute convolved parameter covariance") # The covariance of the parameters is composed of ncoef*ncoef blocks each of size nwave*nwave # A block (p,k) is the covariance of the unconvolved spectra p and k , corresponding to the polynomial indices p and k # We first sandwich each block with the average resolution. convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave)) for p in range(ncoef) : for k in range(ncoef) : convolved_parameter_covar[p,k] = np.diagonal(Rmean.dot(parameter_covar[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rmean.T.todense())) ''' import astropy.io.fits as pyfits pyfits.writeto("convolved_parameter_covar.fits",convolved_parameter_covar,overwrite=True) # other approach log.info("dense Rmean...") Rmean=Rmean.todense() log.info("invert Rinv...") Rinv=np.linalg.inv(Rmean) # check this print("0?",np.max(np.abs(Rinv.dot(Rmean)-np.eye(Rmean.shape[0])))/np.max(np.abs(Rmean))) convolved_parameter_ivar=np.zeros((ncoef,ncoef,nwave)) for p in range(ncoef) : for k in range(ncoef) : convolved_parameter_ivar[p,k] = np.diagonal(Rinv.T.dot(A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rinv)) # solve for each wave separately convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave)) for i in range(nwave) : print("inverting ivar of wave %d/%d"%(i,nwave)) convolved_parameter_covar[:,:,i] = cholesky_invert(convolved_parameter_ivar[:,:,i]) pyfits.writeto("convolved_parameter_covar_bis.fits",convolved_parameter_covar,overwrite=True) import sys sys.exit(12) ''' # Now we compute the sky model variance for each fiber individually # accounting for its focal plane coordinates # so that a target fiber distant for a sky fiber will naturally have a larger # sky model variance log.info("compute sky and variance per fiber") for i in range(frame.nspec): # compute monomials M = [] xi=(frame.fibermap["DESIGN_X"][i]-xm)/xs yi=(frame.fibermap["DESIGN_Y"][i]-ym)/ys for dx in range(angular_variation_deg+1) : for dy in range(angular_variation_deg+1-dx) : M.append((xi**dx)*(yi**dy)) M = np.array(M) unconvolved_fiber_sky_flux=np.zeros(nwave) convolved_fiber_skyvar=np.zeros(nwave) for p in range(ncoef) : unconvolved_fiber_sky_flux += M[p]*parameters[p*nwave:(p+1)*nwave] for k in range(ncoef) : convolved_fiber_skyvar += M[p]*M[k]*convolved_parameter_covar[p,k] # convolve sky model with this fiber's resolution cskyflux[i] = frame.R[i].dot(unconvolved_fiber_sky_flux) # save inverse of variance cskyivar[i] = (convolved_fiber_skyvar>0)/(convolved_fiber_skyvar+(convolved_fiber_skyvar==0)) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance : modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers) else : modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
def compute_polynomial_times_sky(frame, nsig_clipping=4.,max_iterations=30,model_ivar=False,add_variance=True,angular_variation_deg=1,chromatic_variation_deg=1) : """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Polynomial(x[fiber],y[fiber],wavelength[j]) Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar=None if model_ivar : log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux") input_ivar=current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar,axis=0) median_ivar_vs_fiber = np.median(current_ivar,axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]) : threshold=0.01 current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] # need focal plane coordinates x = frame.fibermap["DESIGN_X"] y = frame.fibermap["DESIGN_Y"] # normalize for numerical stability xm = np.mean(x) ym = np.mean(y) xs = np.std(x) ys = np.std(y) if xs==0 : xs = 1 if ys==0 : ys = 1 x = (x-xm)/xs y = (y-ym)/ys w = (frame.wave-frame.wave[0])/(frame.wave[-1]-frame.wave[0])*2.-1 # precompute the monomials for the sky fibers log.debug("compute monomials for deg={} and {}".format(angular_variation_deg,chromatic_variation_deg)) monomials=[] for dx in range(angular_variation_deg+1) : for dy in range(angular_variation_deg+1-dx) : xypol = (x**dx)*(y**dy) for dw in range(chromatic_variation_deg+1) : wpol=w**dw monomials.append(np.outer(xypol,wpol)) ncoef=len(monomials) coef=np.zeros((ncoef)) allfibers_monomials=np.array(monomials) log.debug("shape of allfibers_monomials = {}".format(allfibers_monomials.shape)) skyfibers_monomials = allfibers_monomials[:,skyfibers,:] log.debug("shape of skyfibers_monomials = {}".format(skyfibers_monomials.shape)) sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) Pol = np.ones(flux.shape,dtype=float) coef[0] = 1. nout_tot=0 previous_chi2=-10. for iteration in range(max_iterations) : # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*Pol(x,y,wave)*sky # the parameters are the unconvolved sky flux at the wavelength i # and the polynomial coefficients A=np.zeros((nwave,nwave),dtype=float) B=np.zeros((nwave),dtype=float) D=scipy.sparse.lil_matrix((nwave,nwave)) D2=scipy.sparse.lil_matrix((nwave,nwave)) Pol /= coef[0] # force constant term to 1. # solving for the deconvolved mean sky spectrum # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber (1st fit) %d/%d"%(iteration,fiber,nfibers)) D.setdiag(sqrtw[fiber]) D2.setdiag(Pol[fiber]) sqrtwRP = D.dot(Rsky[fiber]).dot(D2) # each row r of R is multiplied by sqrtw[r] A += (sqrtwRP.T*sqrtwRP).todense() B += sqrtwRP.T*sqrtwflux[fiber] log.info("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A[w,:] A_pos_def = A_pos_def[:,w] parameters = B*0 try: parameters[w]=cholesky_solve(A_pos_def,B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format(iteration)) parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0] # parameters = the deconvolved mean sky spectrum # now evaluate the polynomial coefficients Ap=np.zeros((ncoef,ncoef),dtype=float) Bp=np.zeros((ncoef),dtype=float) D2.setdiag(parameters) for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber (2nd fit) %d/%d"%(iteration,fiber,nfibers)) D.setdiag(sqrtw[fiber]) sqrtwRSM = D.dot(Rsky[fiber]).dot(D2).dot(skyfibers_monomials[:,fiber,:].T) Ap += sqrtwRSM.T.dot(sqrtwRSM) Bp += sqrtwRSM.T.dot(sqrtwflux[fiber]) # Add huge prior on zeroth angular order terms to converge faster # (because those terms are degenerate with the mean deconvolved spectrum) weight=1e24 Ap[0,0] += weight Bp[0] += weight # force 0th term to 1 for i in range(1,chromatic_variation_deg+1) : Ap[i,i] += weight # force other wavelength terms to 0 coef=cholesky_solve(Ap,Bp) log.info("pol coef = {}".format(coef)) # recompute the polynomial values Pol = skyfibers_monomials.T.dot(coef).T # chi2 and outlier rejection log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : chi2[fiber]=current_ivar[fiber]*(flux[fiber]-Rsky[fiber].dot(Pol[fiber]*parameters))**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%g ndf=%d chi2pdf=%f delta=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,abs(sum_chi2-previous_chi2),nout_iter)) if nout_iter == 0 and abs(sum_chi2-previous_chi2)<0.2 : break previous_chi2 = sum_chi2+0. log.info("nout tot=%d"%nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # we ignore here the fact that we have fit a angular variation, # so the sky model uncertainties are inaccurate log.info("compute the parameter covariance") try : parameter_covar=cholesky_invert(A) except np.linalg.linalg.LinAlgError : log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv") parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data=np.mean(frame.resolution_data,axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var=np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) Pol = allfibers_monomials.T.dot(coef).T for fiber in range(frame.nspec): cskyflux[fiber] = frame.R[fiber].dot(Pol[fiber]*parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance : modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers) else : modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
def compute_uniform_sky(frame, nsig_clipping=4.,max_iterations=100,model_ivar=False,add_variance=True) : """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar=None if model_ivar : log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux") input_ivar=current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar,axis=0) median_ivar_vs_fiber = np.median(current_ivar,axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]) : threshold=0.01 current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) nout_tot=0 for iteration in range(max_iterations) : # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*sky # and the parameters are the unconvolved sky flux at the wavelength i # so, d(model)/di[fiber,w] = R[fiber][w,i] # this gives # A_ij = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w,i] R[fiber][w,j] # A = sum_fiber ( diag(sqrt(ivar))*R[fiber] ) ( diag(sqrt(ivar))* R[fiber] )^t # A = sum_fiber sqrtwR[fiber] sqrtwR[fiber]^t # and # B = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w] * flux[fiber,w] # B = sum_fiber sum_wave_w sqrt(ivar)[fiber,w]*flux[fiber,w] sqrtwR[fiber,wave] #A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() A=np.zeros((nwave,nwave)) B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] A += (sqrtwR.T*sqrtwR).todense() B += sqrtwR.T*sqrtwflux[fiber] log.info("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A[w,:] A_pos_def = A_pos_def[:,w] parameters = B*0 try: parameters[w]=cholesky_solve(A_pos_def,B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format(iteration)) parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0] log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : # the parameters are directly the unconvolve sky flux # so we simply have to reconvolve it fiber_convolved_sky_flux = Rsky[fiber].dot(parameters) chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break log.info("nout tot=%d"%nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar log.info("compute the parameter covariance") # we may have to use a different method to compute this # covariance try : parameter_covar=cholesky_invert(A) # the above is too slow # maybe invert per block, sandwich by R except np.linalg.linalg.LinAlgError : log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv") parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data=np.mean(frame.resolution_data,axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var=np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance : modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers) else : modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
def compute_sky(fframe,fibermap=None,nsig_clipping=4., apply_resolution=False): """ Adding in the offline algorithm here to be able to apply resolution for sky compute. We will update this here as needed for quicklook. The original weighted sky compute still is the default. Args: fframe: fiberflat fielded frame object fibermap: fibermap object apply_resolution: if True, uses the resolution in the frame object to evaluate sky allowing fiber to fiber variation of resolution. """ nspec=fframe.nspec nwave=fframe.nwave #- Check with fibermap. exit if None #- use fibermap from frame itself if exists if fframe.fibermap is not None: fibermap=fframe.fibermap if fibermap is None: print("Must have fibermap for Sky compute") sys.exit(0) #- get the sky skyfibers = np.where(fibermap['OBJTYPE'] == 'SKY')[0] skyfluxes=fframe.flux[skyfibers] skyivars=fframe.ivar[skyfibers] nfibers=len(skyfibers) if apply_resolution: max_iterations=100 current_ivar=skyivars.copy() Rsky = fframe.R[skyfibers] sqrtw=np.sqrt(skyivars) sqrtwflux=sqrtw*skyfluxes chi2=np.zeros(skyfluxes.shape) nout_tot=0 for iteration in range(max_iterations) : A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : print("iter %d fiber %d"%(iteration,fiber)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] A = A+(sqrtwR.T*sqrtwR).tocsr() B += sqrtwR.T*sqrtwflux[fiber] print("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A.todense()[w,:] A_pos_def = A_pos_def[:,w] skyflux = B*0 try: skyflux[w]=cholesky_solve(A_pos_def,B[w]) except: print("cholesky failed, trying svd in iteration {}".format(iteration)) skyflux[w]=np.linalg.lstsq(A_pos_def,B[w])[0] print("iter %d compute chi2"%iteration) for fiber in range(nfibers) : S = Rsky[fiber].dot(skyflux) chi2[fiber]=current_ivar[fiber]*(skyfluxes[fiber]-S)**2 print("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf print("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break print("nout tot=%d"%nout_tot) # solve once again to get deconvolved sky variance #skyflux,skycovar=cholesky_solve_and_invert(A.todense(),B) skyflux = np.linalg.lstsq(A.todense(),B)[0] skycovar = np.linalg.pinv(A.todense()) #- sky inverse variance, but incomplete and not needed anyway # skyvar=np.diagonal(skycovar) # skyivar=(skyvar>0)/(skyvar+(skyvar==0)) # Use diagonal of skycovar convolved with mean resolution of all fibers # first compute average resolution mean_res_data=np.mean(fframe.resolution_data,axis=0) R = Resolution(mean_res_data) # compute convolved sky and ivar cskycovar=R.dot(skycovar).dot(R.T.todense()) cskyvar=np.diagonal(cskycovar) cskyivar=(cskyvar>0)/(cskyvar+(cskyvar==0)) # convert cskyivar to 2D; today it is the same for all spectra, # but that may not be the case in the future finalskyivar = np.tile(cskyivar, nspec).reshape(nspec, nwave) # Convolved sky finalskyflux = np.zeros(fframe.flux.shape) for i in range(nspec): finalskyflux[i] = fframe.R[i].dot(skyflux) # need to do better here mask = (finalskyivar==0).astype(np.uint32) else: #- compute weighted average sky ignoring the fiber/wavelength resolution if skyfibers.shape[0] > 1: weights=skyivars #- now get weighted meansky and ivar meanskyflux=np.average(skyfluxes,axis=0,weights=weights) wtot=weights.sum(axis=0) werr2=(weights**2*(skyfluxes-meanskyflux)**2).sum(axis=0) werr=np.sqrt(werr2)/wtot meanskyivar=1./werr**2 else: meanskyflux=skyfluxes meanskyivar=skyivar #- Create a 2d- sky model replicating this finalskyflux=np.tile(meanskyflux,nspec).reshape(nspec,nwave) finalskyivar=np.tile(meanskyivar,nspec).reshape(nspec,nwave) mask=fframe.mask skymodel=SkyModel(fframe.wave,finalskyflux,finalskyivar,mask) return skymodel
def compute_sky(frame, nsig_clipping=4.) : """Compute a sky model. Input has to correspond to sky fibers only. Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy() flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) #debug #nfibers=min(nfibers,2) nout_tot=0 for iteration in range(20) : A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d fiber %d"%(iteration,fiber)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] A = A+(sqrtwR.T*sqrtwR).tocsr() B += sqrtwR.T*sqrtwflux[fiber] log.info("iter %d solving"%iteration) skyflux=cholesky_solve(A.todense(),B) log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : S = Rsky[fiber].dot(skyflux) chi2[fiber]=current_ivar[fiber]*(flux[fiber]-S)**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break log.info("nout tot=%d"%nout_tot) # solve once again to get deconvolved sky variance skyflux,skycovar=cholesky_solve_and_invert(A.todense(),B) #- sky inverse variance, but incomplete and not needed anyway # skyvar=np.diagonal(skycovar) # skyivar=(skyvar>0)/(skyvar+(skyvar==0)) # Use diagonal of skycovar convolved with mean resolution of all fibers # first compute average resolution mean_res_data=np.mean(frame.resolution_data,axis=0) R = Resolution(mean_res_data) # compute convolved sky and ivar cskycovar=R.dot(skycovar).dot(R.T.todense()) cskyvar=np.diagonal(cskycovar) cskyivar=(cskyvar>0)/(cskyvar+(cskyvar==0)) # convert cskyivar to 2D; today it is the same for all spectra, # but that may not be the case in the future cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave) # Convolved sky cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(skyflux) # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask, nrej=nout_tot)