Exemplo n.º 1
0
def main(flg):

    if (flg & 2**0):  # Test load
        #profile()
        load_ml_dr7()

    if (flg & 2**1):  # Compare PN DLAs to ML
        chk_pn_dla_to_ml()

    if (flg & 2**2):  # Compare DR5 DLAs to ML
        chk_dr5_dla_to_ml()

    if (flg & 2**3):  # Compare DR5 DLAs to ML
        dr5_false_positives()
Exemplo n.º 2
0
def chk_dr5_dla_to_ml(ml_dlasurvey=None, ml_llssurvey=None, dz_toler=0.015,
                      outfile='vette_dr5.json', write_again=True):
    # Load ML
    if (ml_dlasurvey is None) or (ml_llssurvey is None):
        ml_llssurvey, ml_dlasurvey = load_ml_dr7()
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()  # This is the statistical sample
    # Use coord to efficiently deal with sightlines
    ml_coord = SkyCoord(ra=ml_dlasurvey.sightlines['RA'], dec=ml_dlasurvey.sightlines['DEC'], unit='deg')
    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')
    idx, d2d, d3d = match_coordinates_sky(dr5_coord, ml_coord, nthneighbor=1)
    in_ml = d2d < 2*u.arcsec
    print("{:d} of the DR5 sightlines were covered by ML out of {:d}".format(np.sum(in_ml), len(dr5.sightlines)))
    # 7477 sightlines out of 7482

    # Cut down
    dr5.sightlines = dr5.sightlines[in_ml]
    new_mask = dla_stat(dr5, dr5.sightlines) # 737 good DLAs
    dr5.mask = new_mask
    dr5_dla_coord = dr5.coord
    dr5_dla_zabs = dr5.zabs
    ndr5 = len(dr5_dla_coord)

    ml_dla_coord = ml_dlasurvey.coords
    ml_lls_coord = ml_llssurvey.coords

    # Loop on DR5 DLAs and save indices of the matches
    dr5_ml_idx = np.zeros(ndr5).astype(int) - 1
    for ii in range(ndr5):
        # Match to ML
        dla_mts = np.where(dr5_dla_coord[ii].separation(ml_dla_coord) < 2*u.arcsec)[0]
        nmt = len(dla_mts)
        if nmt == 0:  # No match
            # Check for LLS
            lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0]
            nmt2 = len(lls_mts)
            if nmt2 == 0:  # No match
                pass
            else:
                zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML
                zdiff = np.abs(dr5_dla_zabs[ii]-zML)
                if np.min(zdiff) < dz_toler:
                    dr5_ml_idx[ii] = -9  # SLLS match
        else:
            zML = ml_dlasurvey.zabs[dla_mts] # Redshifts of all DLAs on the sightline in ML
            zdiff = np.abs(dr5_dla_zabs[ii]-zML)
            if np.min(zdiff) < dz_toler:
                #print("Match on {:d}!".format(ii))
                # Match
                imin = np.argmin(zdiff)
                dr5_ml_idx[ii] = dla_mts[imin]
            else: # Check for LLS
                lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0]
                nmt2 = len(lls_mts)
                if nmt2 == 0:  # No match
                    pass
                else:
                    zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML
                    zdiff = np.abs(dr5_dla_zabs[ii]-zML)
                    if np.min(zdiff) < dz_toler:
                        dr5_ml_idx[ii] = -9  # SLLS match


    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')

    # Write out misses
    misses = np.where(dr5_ml_idx == -1)[0]
    plates, fibers = [], []
    for miss in misses:
        imin = np.argmin(dr5_dla_coord[miss].separation(dr5_coord))
        plates.append(dr5.sightlines['PLATE'][imin])
        fibers.append(dr5.sightlines['FIB'][imin])
    mtbl = Table()
    mtbl['PLATE'] = plates
    mtbl['FIBER'] = fibers
    mtbl['NHI'] = dr5.NHI[misses]
    mtbl['zabs'] = dr5.zabs[misses]
    if write_again:
        mtbl.write('DR5_misses.ascii', format='ascii.fixed_width', overwrite=True)

    # Write out SLLS
    sllss = np.where(dr5_ml_idx == -9)[0]
    plates, fibers = [], []
    for slls in sllss:
        imin = np.argmin(dr5_dla_coord[slls].separation(dr5_coord))
        plates.append(dr5.sightlines['PLATE'][imin])
        fibers.append(dr5.sightlines['FIB'][imin])
    mtbl = Table()
    mtbl['PLATE'] = plates
    mtbl['FIBER'] = fibers
    mtbl['NHI'] = dr5.NHI[sllss]
    mtbl['zabs'] = dr5.zabs[sllss]
    if write_again:
        mtbl.write('DR5_SLLS.ascii', format='ascii.fixed_width', overwrite=True)
    pdb.set_trace()

    # ML not matched by PW09?
    ml_dla_coords = ml_dlasurvey.coords
    idx2, d2d2, d3d = match_coordinates_sky(ml_dla_coords, dr5_dla_coord, nthneighbor=1)
    not_in_dr5 = d2d2 > 2*u.arcsec  # This doesn't match redshifts!
    might_be_in_dr5 = np.where(~not_in_dr5)[0]

    others_not_in = []  # this is some painful book-keeping
    for idx in might_be_in_dr5:  # Matching redshifts..
        imt = ml_dla_coord[idx].separation(dr5_dla_coord) < 2*u.arcsec
        # Match on dztoler
        if np.min(np.abs(ml_dlasurvey.zabs[idx]-dr5.zabs[imt])) > dz_toler:
            others_not_in.append(idx)

    # Save
    out_dict = {}
    out_dict['in_ml'] = in_ml
    out_dict['dr5_idx'] = dr5_ml_idx  # -1 are misses, -9 are SLLS
    out_dict['not_in_dr5'] = np.concatenate([np.where(not_in_dr5)[0], np.array(others_not_in)])
    ltu.savejson(outfile, ltu.jsonify(out_dict), overwrite=True)
Exemplo n.º 3
0
def chk_pn_dla_to_ml(ml_dlasurvey=None, ml_llssurvey=None, dz_toler=0.015, outfile='vette_dr7_pn.json'):
    """ Compare results of Noterdaeme to ML
    Save to JSON file
    """
    # Load ML
    if (ml_dlasurvey is None) or (ml_llssurvey is None):
        ml_llssurvey, ml_dlasurvey = load_ml_dr7()
    # Load PN
    pn_dr7_file = '../Analysis/noterdaeme_dr7.fits'
    pn_dr7 = Table.read(pn_dr7_file)

    # Use coord to efficiently deal with sightlines
    ml_coord = SkyCoord(ra=ml_dlasurvey.sightlines['RA'], dec=ml_dlasurvey.sightlines['DEC'], unit='deg')
    pn_coord = SkyCoord(ra=pn_dr7['_RA'], dec=pn_dr7['_DE'], unit='deg')
    idx, d2d, d3d = match_coordinates_sky(pn_coord, ml_coord, nthneighbor=1)
    in_ml = d2d < 2*u.arcsec
    print("{:d} of the PN sightlines were covered by ML out of {:d}".format(np.sum(in_ml), len(pn_dr7)))

    # Cut
    cut_pn = pn_dr7[in_ml]

    # Loop on PN DLAs and save indices of the matches
    pn_ml_idx = np.zeros(len(cut_pn)).astype(int) - 1
    for ii,pnrow in enumerate(cut_pn):
        if pnrow['logN_HI_'] >= 20.3:
            dla_mts = np.where((ml_dlasurvey.plate == pnrow['Plate']) & (ml_dlasurvey.fiber == pnrow['Fiber']))[0]
            nmt = len(dla_mts)
            if nmt == 0:  # No match
                # Check for LLS
                lls_mts = np.where((ml_llssurvey.plate == pnrow['Plate']) & (ml_llssurvey.fiber == pnrow['Fiber']))[0]
                nmt2 = len(lls_mts)
                if nmt2 == 0:  # No match
                    pass
                else:
                    zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML
                    zdiff = np.abs(pnrow['zabs']-zML)
                    if np.min(zdiff) < dz_toler:
                        pn_ml_idx[ii] = -9  # SLLS match
            else:
                zML = ml_dlasurvey.zabs[dla_mts] # Redshifts of all DLAs on the sightline in ML
                zdiff = np.abs(pnrow['zabs']-zML)
                if np.min(zdiff) < dz_toler:
                    #print("Match on {:d}!".format(ii))
                    # Match
                    imin = np.argmin(zdiff)
                    pn_ml_idx[ii] = dla_mts[imin]
        else:
            pn_ml_idx[ii] = -99  # Not a PN DLA
    # Stats on matches
    '''
    gdm = pn_ml_idx >= 0
    pdb.set_trace()
    dz = cut_pn['zabs'][gdm]-ml_dlasurvey.zabs[pn_ml_idx[gdm]]
    dNHI = cut_pn['logN_HI_'][gdm]-ml_dlasurvey.NHI[pn_ml_idx[gdm]]
    plt.clf()
    #plt.hist(dz)
    plt.hist(dNHI)
    plt.show()
    '''
    # PN not matched by ML?
    misses = (pn_ml_idx == -1)
    pn_missed = cut_pn[misses]
    # Write high NHI systems to disk
    high_NHI = pn_missed['logN_HI_'] > 20.8
    pn_missed[['QSO','Plate','Fiber', 'zem', 'zabs', 'Flag', 'logN_HI_']][high_NHI].write("N09_missed_highNHI.ascii", format='ascii.fixed_width', overwrite=True)

    # ML not matched by PN?
    ml_dla_coords = ml_dlasurvey.coords
    idx2, d2d2, d3d = match_coordinates_sky(ml_dla_coords, pn_coord, nthneighbor=1)
    not_in_pn = d2d2 > 2*u.arcsec  # This doesn't check zabs!!

    tmp_tbl = Table()
    for key in ['plate', 'fiber', 'zabs', 'NHI', 'confidence']:
        tmp_tbl[key] = getattr(ml_dlasurvey, key)

    # Save
    out_dict = {}
    out_dict['in_ml'] = in_ml
    out_dict['pn_idx'] = pn_ml_idx  # -1 are misses, -99 are not DLAs in PN
    out_dict['not_in_pn'] = np.where(not_in_pn)[0]
    ltu.savejson(outfile, ltu.jsonify(out_dict), overwrite=True)
    print("Wrote: {:s}".format(outfile))
Exemplo n.º 4
0
def dr5_false_positives(ml_dlasurvey=None, ml_llssurvey=None):
    vette_file = 'vette_dr5.json'
    from pyigm.surveys.dlasurvey import DLASurvey
    from matplotlib import pyplot as plt
    # Load ML
    if (ml_dlasurvey is None):
        _, ml_dlasurvey = load_ml_dr7()
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()  # This is the statistical sample
    # Vette
    vette = ltu.loadjson(vette_file)
    dr5_ml_idx = np.array(vette['dr5_idx'])

    # Use coord to efficiently deal with sightlines
    ml_dla_coord = ml_dlasurvey.coords
    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')
    idx, d2d, d3d = match_coordinates_sky(ml_dla_coord, dr5_coord, nthneighbor=1)
    in_dr5 = d2d < 2*u.arcsec
    print("{:d} of the ML DLA were in the DR5 sightlines".format(np.sum(in_dr5)))

    # False positives
    fpos = np.array([True]*ml_dlasurvey.nsys)
    fpos[~in_dr5] = False

    # False positives
    imatched = np.where(dr5_ml_idx >= 0)[0]
    match_val = dr5_ml_idx[imatched]
    fpos[match_val] = False
    print("There are {:d} total false positives".format(np.sum(fpos)))
    # This nearly matches David's.  Will run with his analysis.

    fpos_in_stat = fpos.copy()
    # Restrict on DR5
    plates = ml_dlasurvey.plate
    fibers = ml_dlasurvey.fiber
    zabs = ml_dlasurvey.zabs
    zem = ml_dlasurvey.zem
    for idx in np.where(fpos_in_stat)[0]:
        # Finally, match to DR5
        dr5_sl = np.where((dr5.sightlines['PLATE'] == plates[idx]) &
                          (dr5.sightlines['FIB'] == fibers[idx]))[0][0]
        if (zabs[idx] >= dr5.sightlines['Z_START'][dr5_sl]) & \
                (zabs[idx] <= dr5.sightlines['Z_END'][dr5_sl]):
            pass
        else:
            fpos_in_stat[idx] = False
    print("Number of FP in DR5 analysis region = {:d}".format(np.sum(fpos_in_stat)))
    print("Number with NHI<20.45 = {:d}".format(np.sum(ml_dlasurvey.NHI[fpos_in_stat]< 20.45)))

    # High NHI
    highNHI = ml_dlasurvey.NHI[fpos_in_stat] > 21.
    htbl = Table()
    htbl['PLATE'] = plates[fpos_in_stat][highNHI]
    htbl['FIBER'] = fibers[fpos_in_stat][highNHI]
    htbl['zabs'] = zabs[fpos_in_stat][highNHI]
    htbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][highNHI]
    htbl.write("FP_DR5_highNHI.ascii", format='ascii.fixed_width', overwrite=True)

    # Medium NHI
    medNHI = (ml_dlasurvey.NHI[fpos_in_stat] > 20.6) & (ml_dlasurvey.NHI[fpos_in_stat] < 21)
    mtbl = Table()
    mtbl['PLATE'] = plates[fpos_in_stat][medNHI]
    mtbl['FIBER'] = fibers[fpos_in_stat][medNHI]
    mtbl['zabs'] = zabs[fpos_in_stat][medNHI]
    mtbl['zem'] = zem[fpos_in_stat][medNHI]
    mtbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][medNHI]
    mtbl.write("FP_DR5_medNHI.ascii", format='ascii.fixed_width', overwrite=True)
Exemplo n.º 5
0
def mktab_dr7(outfil='tab_dr7_dlas.tex', ml_dlasurvey=None, sub=False):

    # Load DLA samples
    if ml_dlasurvey is None:
        _, ml_dlasurvey = load_ml_dr7()
    # This speeds things up
    coords = ml_dlasurvey.coords
    ra = coords.ra.value
    dec = coords.dec.value

    # Load DR5 vette file
    vette_file = '../Vetting/vette_dr5.json'
    vdr5 = ltu.loadjson(vette_file)
    #dr5_idx = np.array(vdr5['dr5_idx'])
    not_in_dr5 = np.array(vdr5['not_in_dr5'])  # Redshifts may not match
    in_dr5 = np.array([True] * ml_dlasurvey.nsys)
    in_dr5[not_in_dr5] = False

    # Load DR7 vette file
    vette_file = '../Vetting/vette_dr7_pn.json'
    vdr7 = ltu.loadjson(vette_file)
    in_ml = np.array(vdr7['in_ml'])
    pn_ml_idx = np.array(vdr7['pn_idx'])
    not_in_pn = np.array(vdr7['not_in_pn'])
    ml_in_pn = np.array([True] * len(ml_dlasurvey._abs_sys))
    ml_in_pn[not_in_pn] = False

    # Shen (for BALs)
    shen = Table.read('dr7_bh_Nov19_2013.fits.gz')

    # Open
    tbfil = open(outfil, 'w')

    # Header
    #tbfil.write('\\clearpage\n')
    tbfil.write('\\begin{table*}\n')
    tbfil.write('\\centering\n')
    tbfil.write('\\begin{minipage}{170mm} \n')
    tbfil.write('\\caption{SDSS DR7 DLA CANDIDATES$^a$\\label{tab:dr7}}\n')
    tbfil.write('\\begin{tabular}{lcccccccc}\n')
    tbfil.write('\\hline \n')
    #tbfil.write('\\rotate\n')
    #tbfil.write('\\tablewidth{0pc}\n')
    #tbfil.write('\\tabletypesize{\\small}\n')
    tbfil.write(
        'RA & DEC & Plate & Fiber & \\zabs & \\nhi & Conf. & BAL$^b$ \n')
    tbfil.write('& Previous?$^c$')
    tbfil.write('\\\\ \n')
    #tbfil.write('& & & (\AA) & (10$^{-15}$) & & (10$^{-17}$) &  ')
    #tbfil.write('} \n')
    tbfil.write('\\hline \n')

    #tbfil.write('\\startdata \n')

    bals, N09 = [], []
    cnt = 0
    for ii, dla in enumerate(ml_dlasurvey._abs_sys):
        if dla.zabs > dla.zem:  # RESTRICTING
            N09.append(0)  # Make believe, but that is ok
            bals.append(0)
            continue
        if sub and (cnt > 5):
            break
        else:
            cnt += 1

        # Match to shen
        mt_shen = np.where((shen['PLATE'] == dla.plate)
                           & (shen['FIBER'] == dla.fiber))[0]
        if len(mt_shen) != 1:
            pdb.set_trace()
        # Generate line
        dlac = '{:0.4f} & {:0.4f} & {:d} & {:d} & {:0.3f} & {:0.2f} & {:0.2f} & {:d}'.format(
            ra[ii], dec[ii], dla.plate, dla.fiber, dla.zabs, dla.NHI,
            dla.confidence, shen['BAL_FLAG'][mt_shen[0]])
        bals.append(shen['BAL_FLAG'][mt_shen[0]])
        # In previous survey?
        flg_prev = 0
        if ml_in_pn[ii]:
            flg_prev += 1
            N09.append(1)
        else:
            N09.append(0)
        if in_dr5[ii]:
            flg_prev += 2
        dlac += '& {:d}'.format(flg_prev)
        # End line
        tbfil.write(dlac)
        tbfil.write('\\\\ \n')

    # End Table
    tbfil.write('\\hline \n')
    tbfil.write('\\end{tabular} \n')
    tbfil.write('\\end{minipage} \n')
    tbfil.write('{$^a$}Restricted to systems with $\mzabs < \mzem$.\\\\ \n')
    tbfil.write(
        '{$^b$}Quasar is reported to exhibit BAL features by \cite{shen11} (1=True).  We caution that additional BAL features exist in the purported non-BAL quasars.\\\\ \n'
    )
    tbfil.write(
        '{$^c$}DLA is new (0) or is also reported by N09 (1), PW09 (2), or both (3).\\\\ \n'
    )
    tbfil.write('\\end{table*} \n')

    #tbfil.write('\\enddata \n')
    #tbfil.write('\\tablenotetext{a}{Flag describing the continuum method applied: 0=Analysis based only on Lyman series lines; 1=Linear fit; 2=Constant fit; 3=Continuum imposed by hand.}\n')
    #tbfil.write('\\tablecomments{Units for $C_0$ and $C_1$ are erg/s/cm$^2$/\\AA\ and erg/s/cm$^2$/\\AA$^2$ respecitvely.}\n')
    # End
    #tbfil.write('\\end{deluxetable*} \n')

    tbfil.close()
    print('Wrote {:s}'.format(outfil))

    if sub:
        return

    # Some stats for the paper
    gd_conf = ml_dlasurvey.confidence > 0.9
    gd_BAL = np.array(bals) == 0
    gd_z = ml_dlasurvey.zabs < ml_dlasurvey.zem
    new = (np.array(N09) == 0) & (~in_dr5)
    gd_zem = ml_dlasurvey.zem < 3.8
    gd_new = gd_BAL & gd_conf & new & gd_z

    new_dlas = Table()
    new_dlas['PLATE'] = ml_dlasurvey.plate[gd_new]
    new_dlas['FIBER'] = ml_dlasurvey.fiber[gd_new]
    new_dlas['zabs'] = ml_dlasurvey.zabs[gd_new]
    new_dlas['NHI'] = ml_dlasurvey.NHI[gd_new]
    print("There are {:d} DR7 candidates.".format(ml_dlasurvey.nsys))
    print("There are {:d} DR7 candidates not in BAL with zabs<zem.".format(
        np.sum(gd_BAL & gd_z)))
    print("There are {:d} good DR7 candidates not in BAL.".format(
        np.sum(gd_BAL & gd_conf & gd_z)))
    print("There are {:d} good DR7 candidates not in N09, PW09 nor BAL".format(
        np.sum(gd_new)))
    print(
        "There are {:d} good DR7 candidates not in N09, PW09 nor BAL and with zem<3.8"
        .format(np.sum(gd_new & gd_zem)))

    # Write out
    new_dlas.write("new_DR7_DLAs.ascii",
                   format='ascii.fixed_width',
                   overwrite=True)
    pdb.set_trace()
Exemplo n.º 6
0
def init_for_ipython():
    #from imp import reload
    #import paperI_figs as pfigs
    _, ml_dlasurvey = load_ml_dr7()
    return ml_dlasurvey