def __init__(self, spectrafiles, coadd=True, targetids=None, first_target=None, n_target=None, comm=None): comm_size = 1 comm_rank = 0 if comm is not None: comm_size = comm.size comm_rank = comm.rank # check the file list if isinstance(spectrafiles, basestring): import glob spectrafiles = glob.glob(spectrafiles) assert len(spectrafiles) > 0 self._spectrafiles = spectrafiles # This is the mapping between specs to targets for each file self._spec_to_target = {} self._target_specs = {} self._spec_keep = {} self._spec_sliced = {} # The bands for each file self._bands = {} self._wave = {} # The full list of targets from all files self._alltargetids = set() # The fibermaps from all files self._fmaps = {} for sfile in spectrafiles: hdus = None nhdu = None fmap = None if comm_rank == 0: hdus = fits.open(sfile, memmap=True) nhdu = len(hdus) fmap = encode_table( Table(hdus["FIBERMAP"].data, copy=True).as_array()) if comm is not None: nhdu = comm.bcast(nhdu, root=0) fmap = comm.bcast(fmap, root=0) # Now every process has the fibermap and number of HDUs. Build the # mapping between spectral rows and target IDs. keep_targetids = targetids if targetids is None: keep_targetids = fmap["TARGETID"] # Select a subset of the target range from each file if desired. if first_target is None: first_target = 0 if first_target > len(keep_targetids): raise RuntimeError("first_target value \"{}\" is beyond the " "number of selected targets in the file".\ format(first_target)) if n_target is None: n_target = len(keep_targetids) if first_target + n_target > len(keep_targetids): raise RuntimeError( "Requested first_target / n_target " " range is larger than the number of selected targets " " in the file") keep_targetids = keep_targetids[first_target:first_target + n_target] self._alltargetids.update(keep_targetids) # This is the spectral row to target mapping using the original # global indices (before slicing). self._spec_to_target[sfile] = [ x if y in keep_targetids else -1 \ for x, y in enumerate(fmap["TARGETID"]) ] # The reduced set of spectral rows. self._spec_keep[sfile] = [ x for x in self._spec_to_target[sfile] \ if x >= 0 ] # The mapping between original spectral indices and the sliced ones self._spec_sliced[sfile] = { x : y for y, x in \ enumerate(self._spec_keep[sfile]) } # Slice the fibermap self._fmaps[sfile] = fmap[self._spec_keep[sfile]] # For each target, store the sliced row index of all spectra, # so that we can do a fast lookup later. self._target_specs[sfile] = {} for id in keep_targetids: self._target_specs[sfile][id] = [ x for x, y in \ enumerate(self._fmaps[sfile]["TARGETID"]) if y == id ] # We need some more metadata information for each file- # specifically, the bands that are used and their wavelength grids. # That information will allow us to pre-allocate our local target # list and then fill that with one pass through all HDUs in the # files. self._bands[sfile] = [] self._wave[sfile] = dict() if comm_rank == 0: for h in range(nhdu): name = None if "EXTNAME" not in hdus[h].header: continue name = hdus[h].header["EXTNAME"] mat = re.match(r"(.*)_(.*)", name) if mat is None: continue band = mat.group(1).lower() if band not in self._bands[sfile]: self._bands[sfile].append(band) htype = mat.group(2) if htype == "WAVELENGTH": self._wave[sfile][band] = \ hdus[h].data.astype(np.float64).copy() if comm is not None: self._bands[sfile] = comm.bcast(self._bands[sfile], root=0) self._wave[sfile] = comm.bcast(self._wave[sfile], root=0) if comm_rank == 0: hdus.close() self._keep_targets = list(sorted(self._alltargetids)) # Now we have the metadata for all targets in all files. Distribute # the targets among process weighted by the amount of work to do for # each target. This weight is either "1" if we are going to use coadds # or the number of spectra if we are using all the data. tweights = None if not coadd: tweights = dict() for t in self._keep_targets: tweights[t] = 0 for sfile in spectrafiles: if t in self._target_specs[sfile]: tweights[t] += len(self._target_specs[sfile][t]) self._proc_targets = distribute_work(comm_size, self._keep_targets, weights=tweights) self._my_targets = self._proc_targets[comm_rank] # Reverse mapping- target ID to index in our list self._my_target_indx = {y: x for x, y in enumerate(self._my_targets)} # Now every process has its local target IDs assigned. Pre-create our # local target list with empty spectral data (except for wavelengths) self._my_data = list() for t in self._my_targets: speclist = list() tileids = set() exps = set() bname = None for sfile in spectrafiles: for b in self._bands[sfile]: if t in self._target_specs[sfile]: nspec = len(self._target_specs[sfile][t]) for s in range(nspec): sindx = self._target_specs[sfile][t][s] frow = self._fmaps[sfile][sindx] if bname is None: bname = frow["BRICKNAME"] exps.add(frow["EXPID"]) if "TILEID" in frow.dtype.names: tileids.add(frow["TILEID"]) speclist.append( Spectrum(self._wave[sfile][b], None, None, None, None)) # Meta dictionary for this target. Whatever keys we put in here # will end up as columns in the final zbest output table. tmeta = dict() tmeta["NUMEXP"] = len(exps) tmeta["NUMEXP_datatype"] = "i4" tmeta["NUMTILE"] = len(tileids) tmeta["NUMTILE_datatype"] = "i4" tmeta["BRICKNAME"] = bname tmeta["BRICKNAME_datatype"] = "S8" self._my_data.append(Target(t, speclist, coadd=False, meta=tmeta)) # Iterate over the data and broadcast. Every process selects the rows # of each table that contain pieces of local target data and copies it # into place. # these are for tracking offsets within the spectra for each target. tspec_flux = {x: 0 for x in self._my_targets} tspec_ivar = tspec_flux.copy() tspec_mask = tspec_flux.copy() tspec_res = tspec_flux.copy() for sfile in spectrafiles: rows = self._spec_keep[sfile] if len(rows) == 0: continue hdus = None if comm_rank == 0: hdus = fits.open(sfile, memmap=True) for b in self._bands[sfile]: extname = "{}_{}".format(b.upper(), "FLUX") hdata = None if comm_rank == 0: hdata = hdus[extname].data[rows] if comm is not None: hdata = comm.bcast(hdata, root=0) toff = 0 for t in self._my_targets: if t in self._target_specs[sfile]: for trow in self._target_specs[sfile][t]: self._my_data[toff].spectra[tspec_flux[t]].flux = \ hdata[trow].astype(np.float64).copy() tspec_flux[t] += 1 toff += 1 extname = "{}_{}".format(b.upper(), "IVAR") hdata = None if comm_rank == 0: hdata = hdus[extname].data[rows] if comm is not None: hdata = comm.bcast(hdata, root=0) toff = 0 for t in self._my_targets: if t in self._target_specs[sfile]: for trow in self._target_specs[sfile][t]: self._my_data[toff].spectra[tspec_ivar[t]].ivar = \ hdata[trow].astype(np.float64).copy() tspec_ivar[t] += 1 toff += 1 extname = "{}_{}".format(b.upper(), "MASK") hdata = None if comm_rank == 0: if extname in hdus: hdata = hdus[extname].data[rows] if comm is not None: hdata = comm.bcast(hdata, root=0) if hdata is not None: toff = 0 for t in self._my_targets: if t in self._target_specs[sfile]: for trow in self._target_specs[sfile][t]: self._my_data[toff].spectra[tspec_mask[t]]\ .ivar *= (hdata[trow] == 0) tspec_mask[t] += 1 toff += 1 extname = "{}_{}".format(b.upper(), "RESOLUTION") hdata = None if comm_rank == 0: hdata = hdus[extname].data[rows] if comm is not None: hdata = comm.bcast(hdata, root=0) toff = 0 for t in self._my_targets: if t in self._target_specs[sfile]: for trow in self._target_specs[sfile][t]: dia = Resolution(hdata[trow].astype(np.float64)) csr = dia.tocsr() self._my_data[toff].spectra[tspec_res[t]].R = dia self._my_data[toff].spectra[tspec_res[t]].Rcsr = \ csr tspec_res[t] += 1 toff += 1 del hdata if comm_rank == 0: hdus.close() # Compute the coadds now if we are going to use those if coadd: for t in self._my_data: t.compute_coadd() self.fibermap = Table(np.hstack([ self._fmaps[x] \ for x in self._spectrafiles ])) super(DistTargetsDESI, self).__init__(self._keep_targets, comm=comm)
def read_spectra(spplate_name, targetids=None, use_frames=False, fiberid=None, coadd=False): """Read targets from a list of spectra files Args: spplate_name (str): input spPlate file targetids (list): restrict targets to this subset. use_frames (bool): if True, use frames. fiberid (int): Use this fiber ID. coadd (bool): if True, compute and use the coadds. Returns: tuple: (targets, meta) where targets is a list of Target objects and meta is a Table of metadata (currently only BRICKNAME). """ ## read spplate spplate = fitsio.FITS(spplate_name) plate = spplate[0].read_header()["PLATEID"] mjd = spplate[0].read_header()["MJD"] if not use_frames: infiles = [spplate_name] if use_frames: path = os.path.dirname(spplate_name) cameras = ['b1','r1','b2','r2'] infiles = [] nexp_tot=0 for c in cameras: try: nexp = spplate[0].read_header()["NEXP_{}".format(c.upper())] except ValueError: print("DEBUG: spplate {} has no exposures in camera {} ".format(spplate_name,c)) continue for i in range(1,nexp+1): nexp_tot += 1 expid = str(nexp_tot) if nexp_tot<10: expid = '0'+expid exp = path+"/spCFrame-"+spplate[0].read_header()["EXPID"+expid][:11]+".fits" infiles.append(exp) spplate.close() bricknames={} dic_spectra = {} for infile in infiles: h = fitsio.FITS(infile) assert plate == h[0].read_header()["PLATEID"] fs = h[5]["FIBERID"][:] if fiberid is not None: w = np.in1d(fs,fiberid) fs = fs[w] fl = h[0].read() iv = h[1].read()*(h[2].read()==0) wd = h[4].read() ## crop to lmin, lmax lmin = 3500. lmax = 10000. if use_frames: la = 10**h[3].read() if h[0].read_header()["CAMERAS"][0]=="b": lmax = 6000. else: lmin = 5500. else: coeff0 = h[0].read_header()["COEFF0"] coeff1 = h[0].read_header()["COEFF1"] la = 10**(coeff0 + coeff1*np.arange(fl.shape[1])) la = np.broadcast_to(la,fl.shape) imin = abs(la-lmin).min(axis=0).argmin() imax = abs(la-lmax).min(axis=0).argmin() la = la[:,imin:imax] fl = fl[:,imin:imax] iv = iv[:,imin:imax] wd = wd[:,imin:imax] w = wd<1e-5 wd[w]=2. ii = np.arange(la.shape[1]) di = ii-ii[:,None] di2 = di**2 ndiag = int(4*np.ceil(wd.max())+1) nbins = wd.shape[1] for f in fs: i = (f-1) if use_frames: i = i%500 if np.all(iv[i]==0): print("DEBUG: skipping plate,fid = {},{} (no data)".format(plate,f)) continue t = platemjdfiber2targetid(plate, mjd, f) if t not in dic_spectra: dic_spectra[t]=[] brickname = '{}-{}'.format(plate,mjd) bricknames[t] = brickname ## build resolution from wdisp reso = np.zeros([ndiag,nbins]) for idiag in range(ndiag): offset = ndiag//2-idiag d = np.diagonal(di2,offset=offset) if offset<0: reso[idiag,:len(d)] = np.exp(-d/2/wd[i,:len(d)]**2) else: reso[idiag,nbins-len(d):nbins]=np.exp(-d/2/wd[i,nbins-len(d):nbins]**2) R = Resolution(reso) ccd = sparse.spdiags(1./R.sum(axis=1).T, 0, *R.shape) R = (ccd*R).todia() dic_spectra[t].append(Spectrum(la[i], fl[i], iv[i], R, R.tocsr())) h.close() print("DEBUG: read {} ".format(infile)) if targetids == None: targetids = sorted(list(dic_spectra.keys())) targets = [] for targetid in targetids: spectra = dic_spectra[targetid] # Add the brickname to the meta dictionary. The keys of this dictionary # will end up as extra columns in the output ZBEST HDU. tmeta = dict() tmeta["BRICKNAME"] = bricknames[targetid] tmeta["BRICKNAME_datatype"] = "S8" if len(spectra) > 0: targets.append(Target(targetid, spectra, coadd=coadd, meta=tmeta)) else: print('ERROR: Target {} on {} has no good spectra'.format(targetid, os.path.basename(brickfiles[0]))) #- Create a metadata table in case we might want to add other columns #- in the future assert len(bricknames.keys()) == len(targets) metatable = Table(names=("TARGETID", "BRICKNAME"), dtype=("i8", "S8",)) for t in targetids: metatable.add_row( (t, bricknames[t]) ) return targets, metatable