def get_new_ids(maindb, newdb, idkey, chk=True, mtch_toler=None, pair_sep=0.5 * u.arcsec, close_pairs=False, debug=False): """ Generate new CAT_IDs for an input DB Parameters ---------- maindb : Table newdb : Table RA, DEC assumed to be given by RA_GROUP and DEC_GROUP chk : bool, optional Perform some checks idkey : str Key for ID mtch_toler : Quantity, optional Matching tolerance; typically taken from the default pair_sep : Angle, optional Sepration at which a pair is considered 'real' close_pairs : bool, optional Input list includes close pairs (i.e. within mtch_toler) Returns ------- ids : ndarray (int) Old IDs are filled with negative their value New IDs are generated as needed """ if mtch_toler is None: cdict = defs.get_cat_dict() mtch_toler = cdict['match_toler'] IDs = np.zeros(len(newdb), dtype=int) # Setup c_main = SkyCoord(ra=maindb['RA'], dec=maindb['DEC'], unit='deg') c_new = SkyCoord(ra=newdb['RA_GROUP'], dec=newdb['DEC_GROUP'], unit='deg') # Check for pairs in the new list pidx1, pidx2, pd2d, _ = c_new.search_around_sky(c_new, mtch_toler) pairs = pd2d > pair_sep if np.sum(pairs) and (not close_pairs): print( "Input catalog includes pairs closer than {:g} and wider than {:g}" .format(mtch_toler, pair_sep)) raise IOError("Use close_pairs=True if appropriate") # Find new sources (ignoring pairs at first) idx, d2d, d3d = match_coordinates_sky(c_new, c_main, nthneighbor=1) new = d2d > mtch_toler # Old IDs IDs[~new] = -1 * maindb[idkey][idx[~new]] # Now deal with pairs if np.sum(pairs) > 0: # Check against catalog pidx, pd2d, _ = match_coordinates_sky(c_new[pidx1][pairs], c_main, nthneighbor=1) not_pair_match = pd2d > pair_sep # Reset new -- It will get a new ID below -- np.where is needed to actually set new new[pidx1[pairs][np.where(not_pair_match)[0]]] = True # New IDs nnew = np.sum(new) new_idx = np.where(new)[0] newID = np.max(maindb[idkey]) # Ingest if nnew == 1: IDs[new_idx] = newID + 1 elif nnew > 1: # Deal with duplicates sub_c_new = c_new[new] dup_idx, dup_d2d, _ = match_coordinates_sky(sub_c_new, sub_c_new, nthneighbor=2) if close_pairs: dups = dup_d2d < pair_sep else: dups = dup_d2d < mtch_toler ndups = np.sum(dups) # Not duplicates IDs[new_idx[~dups]] = newID + 1 + np.arange(np.sum(~dups)) newID = max(np.max(IDs), newID) # Duplicates if ndups > 0: warnings.warn( "We found {:d} duplicates (e.g. multiple spectra). Hope this was expected" .format(ndups // 2)) # Cut down to unique and restrict to new ones (there are at least 2 duplicates per match) dup_idx = np.where(dups)[0] dup_filled = np.array([False] * len(sub_c_new)) if debug: pdb.set_trace() for idup in dup_idx: # Ugly loop.. if dup_filled[idup]: # Already filled as a duplicate continue dcoord = sub_c_new[idup] sep = dcoord.separation(sub_c_new) if close_pairs: isep = np.where(sep < pair_sep)[0] else: isep = np.where(sep < mtch_toler)[0] # ID newID += 1 IDs[new_idx[isep]] = newID dup_filled[isep] = True # Avoids the other dup(s) if chk: print("The following sources were previously in the DB") print(newdb[~new]) ''' if close_pairs: # TEST SDSS tc = SkyCoord(ra=210.053222, dec=31.581701, unit='deg')#, (210.053552, 31.58131)]> isep = np.argmin(tc.separation(c_new)) pdb.set_trace() IDs[isep] ''' # Return return IDs
def mk_meta(files, ztbl, fname=False, stype='QSO', skip_badz=False, mdict=None, parse_head=None, debug=False, chkz=False, mtbl_file=None, verbose=False, specdb=None, sdb_key=None, **kwargs): """ Generate a meta Table from an input list of files Parameters ---------- files : list List of FITS files ztbl : Table Table of redshifts. Must include RA, DEC, ZEM, ZEM_SOURCE Used for RA/DEC if fname=False; then requires SPEC_FILE too fname : bool, optional Attempt to parse RA/DEC from the file name Format must be SDSSJ######(.##)+/-######(.#)[x] where x cannot be a #. or +/- stype : str, optional Description of object type (e.g. 'QSO', 'Galaxy', 'SN') specdb : SpecDB, optional Database object to grab ID values from Requires sdb_key sdb_key : str, optional ID key in SpecDB object skip_badz : bool, optional Skip spectra without a parseable redshift (using the Myers catalog) parse_head : dict, optional Parse header for meta info with this dict mdict : dict, optional Input meta data in dict form e.g. mdict=dict(INSTR='ESI') chkz : bool, optional If any sources have no parseable redshift, hit a set_trace mtbl_file : str Filename of input meta table. Current allowed extensions are _meta.ascii or _meta.fits and they must be readable by Table.read(). The values in this table will overwrite any others generated. Table must include a SPEC_FILE column to link meta data Returns ------- meta : Table Meta table """ if specdb is not None: if sdb_key is None: raise IOError("Must specify sdb_key if you are passing in specdb") # coordlist = [] snames = [] for ifile in files: sname = ifile.split('/')[-1] snames.append(sname) if fname: # Starting index if 'SDSSJ' in ifile: i0 = ifile.find('SDSSJ')+4 else: i0 = ifile.rfind('J')+1 # Find end (ugly) for ii in range(i0+1,99999): if ifile[ii] in ('0','1','2','3','4','5','6','7','8','9', '.','+','-'): continue else: i1 = ii break # Deal with .fits if ifile[i1-1] == '.': i1 -= 1 # Get coord try: coord = ltu.radec_to_coord(ifile[i0:i1]) except (UnboundLocalError, ValueError): pdb.set_trace() else: mt = np.where(ztbl['SPEC_FILE'] == sname)[0] if len(mt) != 1: raise IndexError("NO MATCH FOR {:s}".format(sname)) coord = ltu.radec_to_coord((ztbl['RA'][mt], ztbl['DEC'][mt]))[0] coordlist.append(coord) ras = np.array([coord.ra.degree for coord in coordlist]) decs= np.array([coord.dec.degree for coord in coordlist]) coords = SkyCoord(ra=ras, dec=decs, unit='deg') # Generate maindb Table #maindb, tkeys = spbu.start_maindb(private=True) # Fill meta = Table() meta['RA_GROUP'] = coords.ra.deg meta['DEC_GROUP'] = coords.dec.deg meta['STYPE'] = [str(stype)]*len(meta) zem, zsource, ZQ = spzu.zem_from_radec(meta['RA_GROUP'], meta['DEC_GROUP'], ztbl, **kwargs) badz = zem <= 0. if np.sum(badz) > 0: if skip_badz: warnings.warn("Skipping {:d} entries without a parseable redshift".format( np.sum(badz))) badz[:] = False # Risky! else: if chkz: # Turn this on to hit a stop instead of an Exception pdb.set_trace() else: raise ValueError("{:d} entries without a parseable redshift".format( np.sum(badz))) meta['zem_GROUP'] = zem meta['sig_zem'] = 0. # Need to add meta['flag_zem'] = zsource if ZQ is not None: meta['ZQ'] = ZQ # Cut? meta = meta[~badz] # specdb IDs if sdb_key is not None: meta[sdb_key] = [-9999]*len(meta) if sdb_key not in meta.keys(): meta[sdb_key] = [-9999]*len(meta) c_igmsp = SkyCoord(ra=specdb.qcat.cat['RA'], dec=specdb.qcat.cat['DEC'], unit='deg') c_new = SkyCoord(ra=meta['RA_GROUP'], dec=meta['DEC_GROUP'], unit='deg') # Find new sources idx, d2d, d3d = match_coordinates_sky(c_new, c_igmsp, nthneighbor=1) cdict = defs.get_cat_dict() mtch = d2d < cdict['match_toler'] meta[sdb_key][mtch] = specdb.qcat.cat[sdb_key][idx[mtch]] # Stack (primarily as a test) ''' try: maindb = vstack([maindb,meta], join_type='exact') except: pdb.set_trace() ''' # SPEC_FILE meta['SPEC_FILE'] = np.array(files)[~badz] root_names = np.array(snames)[~badz] # Try Header? if parse_head is not None: # Setup to store plist = {} for key in parse_head.keys(): plist[key] = [] # Loop on files for count, sfile in enumerate(meta['SPEC_FILE']): if verbose: print('Parsing {:s}'.format(sfile)) try: head = fits.open(sfile)[0].header except FileNotFoundError: # Try for compressed head = fits.open(sfile+'.gz')[0].header # Call it _ = spbu.parse_header(parse_head, head, mdict=mdict, plist=plist, count=count) ''' for key,item in parse_head.items(): # R if key == 'R': if parse_head[key] is True: try: plist[key].append(spbu.set_resolution(head)) except ValueError: if mdict is not None: try: plist[key].append(mdict['R']) except KeyError: print("Key error!") pdb.set_trace() else: print('Bad inputs to set_resolution()') pdb.set_trace() plist[key].append(0.) else: raise ValueError("Set something else for R") elif key == 'DATE-OBS': if 'MJD' in item: tval = Time(head[item], format='mjd') tval.format = 'iso' else: tval = Time(head[item].replace('/','-'), format='iso') tval.out_submfit = 'date' plist[key].append(tval.iso) else: plist[key].append(head[item]) # INSTRUMENT SPECIFIC try: instr = head['INSTRUME'] except KeyError: instr = 'none' # LRIS if 'LRIS' in instr: # Init for skey in ['DISPERSER', 'INSTR', 'R']: if skey not in plist.keys(): plist[skey] = [] # Figure out detector try: det = head['DETECTOR'] except KeyError: if 'BLUE' in instr: det = 'LRIS-B' else: if head['OUTFILE'] == 'lred': det = 'LRIS-R' else: det = 'LRIS-B' # Add if 'LRIS-R' in det: if len(plist['DISPERSER']) == count: plist['DISPERSER'].append(head['GRANAME']) if len(plist['INSTR']) == count: plist['INSTR'].append('LRISr') else: plist['INSTR'][-1] = 'LRISr' else: if len(plist['DISPERSER']) == count: plist['DISPERSER'].append(head['GRISNAME']) if len(plist['INSTR']) == count: plist['INSTR'].append('LRISb') else: plist['INSTR'][-1] = 'LRISb' # Resolution res = Rdicts[plist['INSTR'][-1]][plist['DISPERSER'][-1]] try: sname = head['SLITNAME'] except KeyError: swidth = 1. else: swidth = defs.slit_width(sname, LRIS=True) plist['R'].append(res/swidth) ''' # Finish for key in plist.keys(): try: meta[key] = plist[key] except: embed(header='338 of private') # mdict if mdict is not None: for key,item in mdict.items(): meta[key] = [item]*len(meta) # EPOCH if 'EPOCH' not in meta.keys(): warnings.warn("EPOCH not defined. Filling with 2000.") meta['EPOCH'] = 2000. # GROUP ID meta['GROUP_ID'] = np.arange(len(meta)).astype(int) # Fill in empty columns with warning mkeys = meta.keys() req_clms = defs.get_req_clms(sdb_key=sdb_key) for clm in req_clms: if clm not in mkeys: if clm not in ['NPIX','WV_MIN','WV_MAX']: # File in ingest_spec warnings.warn("Meta Column {:s} not defined. Filling with DUMMY".format(clm)) if clm == 'DATE-OBS': meta[clm] = ['9999-1-1']*len(meta) else: meta[clm] = ['DUMMY']*len(meta) # Input meta table if mtbl_file is not None: # Read if '_meta.ascii' in mtbl_file: imtbl = Table.read(mtbl_file, format='ascii') elif '_meta.fits' in mtbl_file: imtbl = Table.read(mtbl_file) else: raise IOError("Input meta table must have either an ascii or fits extension") # Check length if len(imtbl) != len(meta): raise IOError("Input meta table must have same length as self-generated one") # Check for SPEC_FILE if 'SPEC_FILE' not in imtbl.keys(): raise ValueError("Input meta table must include SPEC_FILE column") # Loop to get indices idx = [] for row in imtbl: imt = np.where(root_names == row['SPEC_FILE'])[0] if len(imt) == 0: print("No match to spec file {:s}. Will ignore".format(row['SPEC_FILE'])) elif len(imt) == 1: idx.append(imt[0]) else: raise ValueError("Two entries with the same SPEC_FILE. Something went wrong..") idx = np.array(idx) # Loop on keys for key in imtbl.keys(): # Skip? if key in ['SPEC_FILE']: continue if key in meta.keys(): pdb.set_trace() else: # Add Column meta.add_column(imtbl[key][idx]) # Return if debug: meta[['RA_GROUP', 'DEC_GROUP', 'SPEC_FILE']].pprint(max_width=120) pdb.set_trace() return meta