Пример #1
0
def get_new_ids(maindb,
                newdb,
                idkey,
                chk=True,
                mtch_toler=None,
                pair_sep=0.5 * u.arcsec,
                close_pairs=False,
                debug=False):
    """ Generate new CAT_IDs for an input DB

    Parameters
    ----------
    maindb : Table
    newdb : Table
      RA, DEC assumed to be given by RA_GROUP and DEC_GROUP
    chk : bool, optional
      Perform some checks
    idkey : str
      Key for ID
    mtch_toler : Quantity, optional
      Matching tolerance;  typically taken from the default
    pair_sep : Angle, optional
      Sepration at which a pair is considered 'real'
    close_pairs : bool, optional
      Input list includes close pairs (i.e. within mtch_toler)

    Returns
    -------
    ids : ndarray (int)
      Old IDs are filled with negative their value
      New IDs are generated as needed

    """
    if mtch_toler is None:
        cdict = defs.get_cat_dict()
        mtch_toler = cdict['match_toler']
    IDs = np.zeros(len(newdb), dtype=int)
    # Setup
    c_main = SkyCoord(ra=maindb['RA'], dec=maindb['DEC'], unit='deg')
    c_new = SkyCoord(ra=newdb['RA_GROUP'], dec=newdb['DEC_GROUP'], unit='deg')
    # Check for pairs in the new list
    pidx1, pidx2, pd2d, _ = c_new.search_around_sky(c_new, mtch_toler)
    pairs = pd2d > pair_sep
    if np.sum(pairs) and (not close_pairs):
        print(
            "Input catalog includes pairs closer than {:g} and wider than {:g}"
            .format(mtch_toler, pair_sep))
        raise IOError("Use close_pairs=True if appropriate")
    # Find new sources (ignoring pairs at first)
    idx, d2d, d3d = match_coordinates_sky(c_new, c_main, nthneighbor=1)
    new = d2d > mtch_toler
    # Old IDs
    IDs[~new] = -1 * maindb[idkey][idx[~new]]
    # Now deal with pairs
    if np.sum(pairs) > 0:
        # Check against catalog
        pidx, pd2d, _ = match_coordinates_sky(c_new[pidx1][pairs],
                                              c_main,
                                              nthneighbor=1)
        not_pair_match = pd2d > pair_sep
        # Reset new -- It will get a new ID below -- np.where is needed to actually set new
        new[pidx1[pairs][np.where(not_pair_match)[0]]] = True
    # New IDs
    nnew = np.sum(new)
    new_idx = np.where(new)[0]
    newID = np.max(maindb[idkey])
    # Ingest
    if nnew == 1:
        IDs[new_idx] = newID + 1
    elif nnew > 1:  # Deal with duplicates
        sub_c_new = c_new[new]
        dup_idx, dup_d2d, _ = match_coordinates_sky(sub_c_new,
                                                    sub_c_new,
                                                    nthneighbor=2)
        if close_pairs:
            dups = dup_d2d < pair_sep
        else:
            dups = dup_d2d < mtch_toler
        ndups = np.sum(dups)
        # Not duplicates
        IDs[new_idx[~dups]] = newID + 1 + np.arange(np.sum(~dups))
        newID = max(np.max(IDs), newID)

        # Duplicates
        if ndups > 0:
            warnings.warn(
                "We found {:d} duplicates (e.g. multiple spectra). Hope this was expected"
                .format(ndups // 2))
            # Cut down to unique and restrict to new ones (there are at least 2 duplicates per match)
            dup_idx = np.where(dups)[0]
            dup_filled = np.array([False] * len(sub_c_new))
            if debug:
                pdb.set_trace()
            for idup in dup_idx:  # Ugly loop..
                if dup_filled[idup]:  # Already filled as a duplicate
                    continue
                dcoord = sub_c_new[idup]
                sep = dcoord.separation(sub_c_new)
                if close_pairs:
                    isep = np.where(sep < pair_sep)[0]
                else:
                    isep = np.where(sep < mtch_toler)[0]
                # ID
                newID += 1
                IDs[new_idx[isep]] = newID
                dup_filled[isep] = True  # Avoids the other dup(s)
    if chk:
        print("The following sources were previously in the DB")
        print(newdb[~new])
    '''
    if close_pairs: # TEST SDSS
        tc = SkyCoord(ra=210.053222, dec=31.581701, unit='deg')#, (210.053552, 31.58131)]>
        isep = np.argmin(tc.separation(c_new))
        pdb.set_trace()
        IDs[isep]
    '''
    # Return
    return IDs
Пример #2
0
def mk_meta(files, ztbl, fname=False, stype='QSO', skip_badz=False,
            mdict=None, parse_head=None, debug=False, chkz=False,
            mtbl_file=None,
            verbose=False, specdb=None, sdb_key=None, **kwargs):
    """ Generate a meta Table from an input list of files

    Parameters
    ----------
    files : list
      List of FITS files
    ztbl : Table
      Table of redshifts.  Must include RA, DEC, ZEM, ZEM_SOURCE
      Used for RA/DEC if fname=False;  then requires SPEC_FILE too
    fname : bool, optional
      Attempt to parse RA/DEC from the file name
      Format must be
      SDSSJ######(.##)+/-######(.#)[x]
        where x cannot be a #. or +/-
    stype : str, optional
      Description of object type (e.g. 'QSO', 'Galaxy', 'SN')
    specdb : SpecDB, optional
      Database object to grab ID values from
      Requires sdb_key
    sdb_key : str, optional
      ID key in SpecDB object
    skip_badz : bool, optional
      Skip spectra without a parseable redshift (using the Myers catalog)
    parse_head : dict, optional
      Parse header for meta info with this dict
    mdict : dict, optional
      Input meta data in dict form e.g.  mdict=dict(INSTR='ESI')
    chkz : bool, optional
      If any sources have no parseable redshift, hit a set_trace
    mtbl_file : str
      Filename of input meta table.  Current allowed extensions are _meta.ascii or _meta.fits
      and they must be readable by Table.read().  The values in this table will overwrite
      any others generated.  Table must include a SPEC_FILE column to link meta data

    Returns
    -------
    meta : Table
      Meta table
    """
    if specdb is not None:
        if sdb_key is None:
            raise IOError("Must specify sdb_key if you are passing in specdb")
    #
    coordlist = []
    snames = []
    for ifile in files:
        sname = ifile.split('/')[-1]
        snames.append(sname)
        if fname:
            # Starting index
            if 'SDSSJ' in ifile:
                i0 = ifile.find('SDSSJ')+4
            else:
                i0 = ifile.rfind('J')+1
            # Find end (ugly)
            for ii in range(i0+1,99999):
                if ifile[ii] in ('0','1','2','3','4','5','6','7','8','9',
                                 '.','+','-'):
                    continue
                else:
                    i1 = ii
                    break
            # Deal with .fits
            if ifile[i1-1] == '.':
                i1 -= 1
            # Get coord
            try:
                coord = ltu.radec_to_coord(ifile[i0:i1])
            except (UnboundLocalError, ValueError):
                pdb.set_trace()
        else:
            mt = np.where(ztbl['SPEC_FILE'] == sname)[0]
            if len(mt) != 1:
                raise IndexError("NO MATCH FOR {:s}".format(sname))
            coord = ltu.radec_to_coord((ztbl['RA'][mt],
                                        ztbl['DEC'][mt]))[0]
        coordlist.append(coord)
    ras = np.array([coord.ra.degree for coord in coordlist])
    decs= np.array([coord.dec.degree for coord in coordlist])
    coords = SkyCoord(ra=ras, dec=decs, unit='deg')

    # Generate maindb Table
    #maindb, tkeys = spbu.start_maindb(private=True)

    # Fill
    meta = Table()
    meta['RA_GROUP'] = coords.ra.deg
    meta['DEC_GROUP'] = coords.dec.deg
    meta['STYPE'] = [str(stype)]*len(meta)

    zem, zsource, ZQ = spzu.zem_from_radec(meta['RA_GROUP'], meta['DEC_GROUP'], 
                                           ztbl, **kwargs)
    badz = zem <= 0.
    if np.sum(badz) > 0:
        if skip_badz:
            warnings.warn("Skipping {:d} entries without a parseable redshift".format(
                np.sum(badz)))
            badz[:] = False  # Risky!
        else:
            if chkz:  # Turn this on to hit a stop instead of an Exception
                pdb.set_trace()
            else:
                raise ValueError("{:d} entries without a parseable redshift".format(
                    np.sum(badz)))
    meta['zem_GROUP'] = zem
    meta['sig_zem'] = 0.  # Need to add
    meta['flag_zem'] = zsource
    if ZQ is not None:
        meta['ZQ'] = ZQ

    # Cut?
    meta = meta[~badz]

    # specdb IDs
    if sdb_key is not None:
        meta[sdb_key] = [-9999]*len(meta)
        if sdb_key not in meta.keys():
            meta[sdb_key] = [-9999]*len(meta)
        c_igmsp = SkyCoord(ra=specdb.qcat.cat['RA'], dec=specdb.qcat.cat['DEC'], unit='deg')
        c_new = SkyCoord(ra=meta['RA_GROUP'], dec=meta['DEC_GROUP'], unit='deg')
        # Find new sources
        idx, d2d, d3d = match_coordinates_sky(c_new, c_igmsp, nthneighbor=1)
        cdict = defs.get_cat_dict()
        mtch = d2d < cdict['match_toler']
        meta[sdb_key][mtch] = specdb.qcat.cat[sdb_key][idx[mtch]]

    # Stack (primarily as a test)
    '''
    try:
        maindb = vstack([maindb,meta], join_type='exact')
    except:
        pdb.set_trace()
    '''

    # SPEC_FILE
    meta['SPEC_FILE'] = np.array(files)[~badz]
    root_names = np.array(snames)[~badz]

    # Try Header?
    if parse_head is not None:
        # Setup to store
        plist = {}
        for key in parse_head.keys():
            plist[key] = []
        # Loop on files
        for count, sfile in enumerate(meta['SPEC_FILE']):
            if verbose:
                print('Parsing {:s}'.format(sfile))
            try:
                head = fits.open(sfile)[0].header
            except FileNotFoundError:  # Try for compressed
                head = fits.open(sfile+'.gz')[0].header

            # Call it
            _ = spbu.parse_header(parse_head, head, mdict=mdict,
                              plist=plist, count=count)

            '''
            for key,item in parse_head.items():
                # R
                if key == 'R':
                    if parse_head[key] is True:
                        try:
                            plist[key].append(spbu.set_resolution(head))
                        except ValueError:
                            if mdict is not None:
                                try:
                                    plist[key].append(mdict['R'])
                                except KeyError:
                                    print("Key error!")
                                    pdb.set_trace()
                            else:
                                print('Bad inputs to set_resolution()')
                                pdb.set_trace()
                                plist[key].append(0.)
                    else:
                        raise ValueError("Set something else for R")
                elif key == 'DATE-OBS':
                    if 'MJD' in item:
                        tval = Time(head[item], format='mjd')
                        tval.format = 'iso'
                    else:
                        tval = Time(head[item].replace('/','-'), format='iso')
                    tval.out_submfit = 'date'
                    plist[key].append(tval.iso)
                else:
                    plist[key].append(head[item])

            # INSTRUMENT SPECIFIC
            try:
                instr = head['INSTRUME']
            except KeyError:
                instr = 'none'
            # LRIS
            if 'LRIS' in instr:
                # Init
                for skey in ['DISPERSER', 'INSTR', 'R']:
                    if skey not in plist.keys():
                        plist[skey] = []
                # Figure out detector
                try:
                    det = head['DETECTOR']
                except KeyError:
                    if 'BLUE' in instr:
                        det = 'LRIS-B'
                    else:
                        if head['OUTFILE'] == 'lred':
                            det = 'LRIS-R'
                        else:
                            det = 'LRIS-B'
                # Add
                if 'LRIS-R' in det:
                    if len(plist['DISPERSER']) == count:
                        plist['DISPERSER'].append(head['GRANAME'])
                    if len(plist['INSTR']) == count:
                        plist['INSTR'].append('LRISr')
                    else:
                        plist['INSTR'][-1] = 'LRISr'
                else:
                    if len(plist['DISPERSER']) == count:
                        plist['DISPERSER'].append(head['GRISNAME'])
                    if len(plist['INSTR']) == count:
                        plist['INSTR'].append('LRISb')
                    else:
                        plist['INSTR'][-1] = 'LRISb'
                # Resolution
                res = Rdicts[plist['INSTR'][-1]][plist['DISPERSER'][-1]]
                try:
                    sname = head['SLITNAME']
                except KeyError:
                    swidth = 1.
                else:
                    swidth = defs.slit_width(sname, LRIS=True)
                plist['R'].append(res/swidth)
            '''

        # Finish
        for key in plist.keys():
            try:
                meta[key] = plist[key]
            except:
                embed(header='338 of private')
    # mdict
    if mdict is not None:
        for key,item in mdict.items():
            meta[key] = [item]*len(meta)

    # EPOCH
    if 'EPOCH' not in meta.keys():
        warnings.warn("EPOCH not defined.  Filling with 2000.")
        meta['EPOCH'] = 2000.

    # GROUP ID
    meta['GROUP_ID'] = np.arange(len(meta)).astype(int)

    # Fill in empty columns with warning
    mkeys = meta.keys()
    req_clms = defs.get_req_clms(sdb_key=sdb_key)
    for clm in req_clms:
        if clm not in mkeys:
            if clm not in ['NPIX','WV_MIN','WV_MAX']:  # File in ingest_spec
                warnings.warn("Meta Column {:s} not defined.  Filling with DUMMY".format(clm))
                if clm == 'DATE-OBS':
                    meta[clm] = ['9999-1-1']*len(meta)
                else:
                    meta[clm] = ['DUMMY']*len(meta)

    # Input meta table
    if mtbl_file is not None:
        # Read
        if '_meta.ascii' in mtbl_file:
            imtbl = Table.read(mtbl_file, format='ascii')
        elif '_meta.fits' in mtbl_file:
            imtbl = Table.read(mtbl_file)
        else:
            raise IOError("Input meta table must have either an ascii or fits extension")
        # Check length
        if len(imtbl) != len(meta):
            raise IOError("Input meta table must have same length as self-generated one")
        # Check for SPEC_FILE
        if 'SPEC_FILE' not in imtbl.keys():
            raise ValueError("Input meta table must include SPEC_FILE column")
        # Loop to get indices
        idx = []
        for row in imtbl:
            imt = np.where(root_names == row['SPEC_FILE'])[0]
            if len(imt) == 0:
                print("No match to spec file {:s}.  Will ignore".format(row['SPEC_FILE']))
            elif len(imt) == 1:
                idx.append(imt[0])
            else:
                raise ValueError("Two entries with the same SPEC_FILE.  Something went wrong..")
        idx = np.array(idx)
        # Loop on keys
        for key in imtbl.keys():
            # Skip?
            if key in ['SPEC_FILE']:
                continue
            if key in meta.keys():
                pdb.set_trace()
            else:
                # Add Column
                meta.add_column(imtbl[key][idx])
    # Return
    if debug:
        meta[['RA_GROUP', 'DEC_GROUP', 'SPEC_FILE']].pprint(max_width=120)
        pdb.set_trace()
    return meta