def sum(apred='r11', telescope='apo25m', lsfs=[ 3430016, 7510018, 11130063, 14600018, 18430026, 22330043, 25560065 ], waveid=None, out='apogee-n', verbose=False, groups=None): """ Make plots for a series of LSFs and a summary web page """ load = apload.ApLoad(apred=apred, telescope=telescope, verbose=verbose) if telescope == 'apo25m': prefix = 'ap' else: prefix = 'as' if waveid is not None: wave = load.apWave(waveid) else: wave = None grid = [] ytit = [] for lsfid in lsfs: lsf = load.apLSF(lsfid) name1 = 'pars_{:08d}'.format(lsfid) parplot(lsf, hard=name1) name2 = 'fwhm_{:08d}'.format(lsfid) group(lsf, wave=wave, hard=name2, groups=groups) grid.append([ name1 + '.png', name2 + '.png', name2 + '_r.png', name2 + '_group.png' ]) ytit.append('<A HREF={:s}LSF-{:08d}.html>{:08d}</A>'.format( prefix, lsfid, lsfid)) xt = ['LSF parameters', 'LSF FWHM', 'LSF R', 'LSF groups'] html.htmltab(grid, xtitle=xt, ytitle=ytit, file=out + '.html')
def dr14comp(a, out=None, elem=True, domiss=False): """ Comparisons to DR14 """ apl = apload.ApLoad(dr='dr14') dr14 = apl.allStar() plotparamdiffs(a, dr14, out=out + 'dr14_', elem=elem) if domiss: miss = set(dr14[1].data['APOGEE_ID']) - set(a[1].data['APOGEE_ID']) print('{:d} stars in DR14 missing from current data'.format(len(miss))) bad = [] bad1m = [] for m in miss: j = np.where(dr14[1].data['APOGEE_ID'] == m)[0] for v, jj in zip(dr14[1].data['VISITS'][j], j): for vv in v.split(','): mjd = vv.split('-')[2] try: if int(mjd) > 55800: bad.append(m) #print(m,vv,mjd,dr14[1].data['FIELD'][jj],dr14[1].data['LOCATION_ID'][jj]) except: bad1m.append(m) #print(m,vv) print('not 1m', len(bad), len(set(bad))) print('1m', len(bad1m), len(set(bad1m)))
def compCframe(plate, frame, apred='test', ratio=True, rows=range(300), yr=None, hdu=1): load = apload.ApLoad(apred=apred) mjd = 55562 + int(frame // 10000) new = load.apCframe('M67', plate, mjd, frame) old = {} fig, ax = plots.multi(1, 3, hspace=0.001) x = np.arange(2048) for ichip, chip in enumerate(chips): old[chip] = fits.open(os.environ['APOGEE_REDUX'] + '/r8/apo25m/{:d}/{:d}/apCframe-{:s}-{:d}.fits'. format(plate, mjd, chip, frame)) for row in rows: if ratio: plots.plotl(ax[ichip], x, new[chip][hdu].data[row, :] / old[chip][hdu].data[row, :], yr=[0, 1.5]) else: plots.plotl(ax[ichip], x, new[chip][hdu].data[row, :], yr=yr) plots.plotl(ax[ichip], x, old[chip][hdu].data[row, :], yr=yr) plots.plotl(ax[ichip], x, new[chip][hdu].data[row, :] - old[chip][hdu].data[row, :], yr=yr)
def dr13dr12() : ''' compare dr13 dr12 Teff ''' dr12load=apload.ApLoad(dr='dr12') dr12=dr12load.allStar()[1].data dr13load=apload.ApLoad(dr='dr13') dr13=dr13load.allStar()[1].data i1,i2 = match.match(dr12['APOGEE_ID'],dr13['APOGEE_ID']) dr12=dr12[i1] dr13=dr13[i2] fig,ax=plots.multi(1,2,hspace=0.001,wspace=0.001) plots.plotc(ax[0],dr13['M_H'],dr13['TEFF']-dr12['TEFF'],dr13['TEFF'],xr=[-2.5,0.75],yr=[-300,300],zr=[3500,5000]) plots.plotc(ax[1],dr13['TEFF'],dr13['TEFF']-dr12['TEFF'],dr13['M_H'],xr=[6500,3000],yr=[-300,300],zr=[-2,0.5])
def comp1d(frame, apred='test', rows=range(300)): load = apload.ApLoad(apred=apred) new = load.ap1D(frame) old = {} mjd = 55562 + int(frame // 10000) fig, ax = plots.multi(1, 3, hspace=0.001) x = np.arange(2048) for ichip, chip in enumerate(chips): old[chip] = fits.open( os.environ['APOGEE_REDUX'] + '/r8/red/{:d}/ap1D-{:s}-{:d}.fits'.format(mjd, chip, frame)) for row in rows: plots.plotl(ax[ichip], x, new[chip][1].data[row, :] / old[chip][1].data[row, :], yr=[0, 1.5])
def kurucz_marcs(): dr13load = apload.ApLoad(dr='dr13') dr13 = dr13load.allStar()[1].data gd = np.where(dr13['SNR'] > 150)[0] dr13 = dr13[gd] dr13load.aspcap = 'l30g' dr13_marcs = dr13load.allStar()[1].data gd = np.where(dr13_marcs['SNR'] > 150)[0] dr13_marcs = dr13_marcs[gd] fig, ax = plots.multi(2, 1, wspace=0.001) axim = plots.plotc(ax[0], dr13['FPARAM'][:, 0], dr13['FPARAM'][:, 1], dr13['FPARAM'][:, 3], xr=[4200, 3000], yr=[5, -1], zr=[-2, 0.5], xt=r'T$_{\rm eff}$', yt='log g', rasterized=True) plots.plotc(ax[1], dr13_marcs['FPARAM'][:, 0], dr13_marcs['FPARAM'][:, 1], dr13_marcs['FPARAM'][:, 3], xr=[4200, 3000], yr=[5, -1], zr=[-2, 0.5], xt=r'T$_{\rm eff}$', rasterized=True) for iax in range(2): for item in (ax[iax].get_xticklabels() + ax[iax].get_yticklabels()): item.set_fontsize(10) ax[iax].xaxis.label.set_size(10) ax[iax].yaxis.label.set_size(10) cbaxes = fig.add_axes([0.91, 0.1, 0.01, 0.8]) cb = plt.colorbar(axim, cax=cbaxes) cb.set_label('[M/H]') cbaxes.tick_params(axis='both', labelsize=10) cbaxes.yaxis.label.set_size(10) fig.savefig('kurucz_marcs.pdf')
def comp(plate=7267, mjd=56654, fiber=150, frame=10920059, field='M67'): r11 = apload.ApLoad(apred='r11') v = r11.apVisit(plate, mjd, fiber) a = r11.ap1D(frame) c = r11.apCframe(field, plate, mjd, frame) v14 = fits.open(os.environ['APOGEE_REDUX'] + '/r8/apo25m/{:d}/{:d}/apVisit-r8-{:d}-{:d}-{:03d}.fits'. format(plate, mjd, plate, mjd, fiber)) a14 = {} c14 = {} for chip in chips: a14[chip] = fits.open( os.environ['APOGEE_REDUX'] + '/r8/red/{:d}/ap1D-{:s}-{:d}.fits'.format(mjd, chip, frame)) c14[chip] = fits.open(os.environ['APOGEE_REDUX'] + '/r8/apo25m/{:d}/{:d}/apCframe-{:s}-{:08d}.fits'. format(plate, mjd, chip, frame)) fig, ax = plots.multi(1, 3, hspace=0.01) x = np.arange(4096) pixmask = bitmask.PixelBitMask() for ichip, chip in enumerate(chips): y = v[1].data[ichip, :] plots.plotl(ax[ichip], x, v[1].data[ichip, :] / v14[1].data[ichip, :]) bd = np.where(((v[3].data[ichip, :] & pixmask.badval()) > 0) | ( (v[3].data[ichip, :] & pixmask.getval('SIG_SKYLINE')) > 0))[0] y[bd] = np.nan plots.plotl(ax[ichip], x, y / v14[1].data[ichip, :]) fig, ax = plots.multi(3, 3, hspace=0.01) x = np.arange(2048) for ichip, chip in enumerate(chips): plots.plotl(ax[ichip, 0], x, c[chip][1].data[300 - fiber, :]) plots.plotl(ax[ichip, 0], x, c14[chip][1].data[300 - fiber, :]) plots.plotl( ax[ichip, 1], x, c[chip][1].data[300 - fiber, :] / c14[chip][1].data[300 - fiber]) plots.plotl( ax[ichip, 2], x, a[chip][1].data[300 - fiber, :] / a14[chip][1].data[300 - fiber])
# set up a global file loader for all modules with a default version # The version/telescope can then be modified by any routine as needed from apogee.utils import apload load = apload.ApLoad()
import matplotlib.pyplot as plt import numpy as np from astropy.io import fits from tools import plots from sdss import yanny from apogee.utils import apload import os import pdb chips = ['a', 'b', 'c'] colors = ['r', 'g', 'b'] # APOGEE-N fig, ax = plots.multi(1, 3, hspace=0.001, sharex=True, sharey=True) t11 = apload.ApLoad(apred='t11') b = t11.ap1D(3190056) plug = yanny.yanny(os.environ['MAPPER_DATA'] + '/55880/plPlugMapM-5585-55880-01.par') objType = np.array(plug['PLUGMAPOBJ']['objType']) fibers = np.array(plug['PLUGMAPOBJ']['fiberId']) tel = np.where(objType == 'SPECTROPHOTO_STD')[0] rows = 300 - fibers[tel] amed = np.median(b['a'][1].data[rows, :], axis=1) bmed = np.median(b['b'][1].data[rows, :], axis=1) cmed = np.median(b['c'][1].data[rows, :], axis=1) anorm = np.median(bmed / amed) cnorm = np.median(bmed / cmed) anorm = 1. cnorm = 1. npix = 190 design = np.zeros([3 * npix * len(rows), 5 + len(rows)])
def getresp(plate, mjd, apred='r12', telescope='apo25m', plot=False): """ Solve for response function and apply it """ # get apPlate file load = apload.ApLoad(apred=apred, telescope=telescope) apPlate = load.apPlate(plate, mjd) # get rows of tellurics fibers = apPlate['b'][11].data['FIBERID'] tel = np.where(apPlate['b'][11].data['OBJTYPE'] == 'HOT_STD')[0] rows = 300 - fibers[tel] # do polynomial fit to log(flux), with 4th order plus offset fo each star, # using every 10th pixel in each chip, so we have 190 pixels * 3 chips * ntelluric data points # and 4 + ntellurics parameters npix = 190 nstars = len(rows) design = np.zeros([3 * npix * nstars, 4 + nstars]) y = np.zeros([3 * npix * nstars]) for ichip, chip in enumerate(chips): for irow, row in enumerate(rows): x = apPlate[chip][4].data - 16000. design[irow * 3 * npix + ichip * npix:irow * 3 * npix + ichip * npix + npix, 0] = x[row, 100:2000:10]**4 design[irow * 3 * npix + ichip * npix:irow * 3 * npix + ichip * npix + npix, 1] = x[row, 100:2000:10]**3 design[irow * 3 * npix + ichip * npix:irow * 3 * npix + ichip * npix + npix, 2] = x[row, 100:2000:10]**2 design[irow * 3 * npix + ichip * npix:irow * 3 * npix + ichip * npix + npix, 3] = x[row, 100:2000:10] design[irow * 3 * npix + ichip * npix:irow * 3 * npix + ichip * npix + npix, 4 + irow] = 1. y[irow * 3 * npix + ichip * npix:irow * 3 * npix + ichip * npix + npix] = np.log10(apPlate[chip][1].data[row, 100:2000:10]) gd = np.where(np.isfinite(y))[0] design = design[gd, :] y = y[gd] # do the fit coef = np.linalg.solve(np.dot(design.T, design), np.dot(design.T, y)) # apply the fit. Note that norm adds a term so that response gives 1/lambda**-2 shape for chip in chips: for row in np.arange(300): print(chip, row) w = apPlate[chip][4].data[row, :] spec = apPlate[chip][1].data[row, :] resp = norm(w, coef) if plot: plt.plot(w, spec) plt.plot(w, resp * 1000) plt.plot(w, spec / resp) plt.show() apPlate[chip][1].data[row, :] /= resp if plot: pdb.set_trace() plt.clf() file = load.filename('Plate', plate=plate, mjd=mjd, apred=apred, chips=True).replace('Plate-', 'Plate-' + chip + '-') apPlate[chip].writeto(file, overwrite=True) # now do the apVisit files for row in np.arange(300): try: print('Row: ', row) apVisit = load.apVisit(plate, mjd, 300 - row) for ichip in range(3): print(ichip) w = apVisit[4].data[ichip, :] resp = norm(w, coef) apVisit[1].data[ichip, :] /= resp file = load.filename('Visit', plate=plate, mjd=mjd, apred=apred, fiber=300 - row) print(file) apVisit.writeto(file, overwrite=True) print('done') except: pass
def dr_compare(): # load the DRs, select stars with SN>150 dr12load = apload.ApLoad(dr='dr12') dr12 = dr12load.allStar()[1].data gd = np.where(dr12['SNR'] > 150)[0] dr12 = dr12[gd] dr13load = apload.ApLoad(dr='dr13') dr13 = dr13load.allStar()[1].data gd = np.where(dr13['SNR'] > 150)[0] dr13 = dr13[gd] dr14load = apload.ApLoad(dr='dr14') dr14 = dr14load.allStar()[1].data gd = np.where(dr14['SNR'] > 150)[0] dr14 = dr14[gd] c = apload.allStar()[3].data # match them m1a, m2a = match.match(dr12['APOGEE_ID'], dr13['APOGEE_ID']) m1b, m2b = match.match(dr12['APOGEE_ID'], dr14['APOGEE_ID']) m1c, m2c = match.match(dr13['APOGEE_ID'], dr14['APOGEE_ID']) # parameter figures figu, axu = plots.multi(3, 7, hspace=0.001, wspace=0.001) figc, axc = plots.multi(3, 7, hspace=0.001, wspace=0.001) tit = [ r'T$_{\rm eff}$', 'log g', r'V$_{\rm micro}$', '[M/H]', '[C/M]', '[N/M]', r'[$\alpha$/M]' ] for iparam in range(7): print(iparam) for iy, param in enumerate(['FPARAM', 'PARAM']): if iy == 0: ax = axu else: ax = axc yt = r'$\Delta$' + tit[iparam] if iparam == 6: xt = r'T$_{\rm eff}$' else: xt = None if iparam == 0: ax[iparam, 0].text(0.5, 1.0, 'DR13-DR12', transform=ax[iparam, 0].transAxes, ha='center', va='bottom') ax[iparam, 1].text(0.5, 1.0, 'DR14-DR12', transform=ax[iparam, 1].transAxes, ha='center', va='bottom') ax[iparam, 2].text(0.5, 1.0, 'DR14-DR13', transform=ax[iparam, 2].transAxes, ha='center', va='bottom') if iparam == 0: yr = [-300, 300] elif iparam == 1: yr = [-0.5, 0.5] else: yr = [-0.3, 0.3] xr = [3500, 6000] axim = plots.plotc(ax[iparam, 0], dr12['TEFF'][m1a], dr13[param][m2a, iparam] - dr12[param][m1a, iparam], dr12[param][m1a, 3], size=1, xr=xr, yr=yr, zr=[-1, 0.5], yt=yt, xt=xt, rasterized=True) plots.plotl(ax[iparam, 0], xr, [0., 0.], ls=':') plots.plotc(ax[iparam, 1], dr12['TEFF'][m1b], dr14[param][m2b, iparam] - dr12[param][m1b, iparam], dr12[param][m1b, 3], size=1, xr=xr, yr=yr, zr=[-1, 0.5], xt=xt, rasterized=True) plots.plotl(ax[iparam, 1], xr, [0., 0.], ls=':') plots.plotc(ax[iparam, 2], dr13['TEFF'][m1c], dr14[param][m2c, iparam] - dr13[param][m1c, iparam], dr13[param][m1c, 3], size=1, xr=xr, yr=yr, zr=[-1, 0.5], xt=xt, rasterized=True) plots.plotl(ax[iparam, 2], xr, [0., 0.], ls=':') for iax in range(3): ax[iparam, iax].tick_params(axis='both', labelsize=8) # add colorbar for fig in [figu, figc]: cbaxes = fig.add_axes([0.91, 0.1, 0.01, 0.8]) cb = plt.colorbar(axim, cax=cbaxes) cb.set_label('[M/H]') cbaxes.tick_params(axis='both', labelsize=8) figu.savefig('drcomp_uncal.pdf') figc.savefig('drcomp_cal.pdf') plots.close() # abundance figure fig, ax = plots.multi(3, 14, hspace=0.001, wspace=0.001, figsize=(8, 32)) for ielem, elem in enumerate([ 'C', 'N', 'O', 'Na', 'Mg', 'Al', 'Si', 'S', 'K', 'Ca', 'Ti', 'V', 'Mn', 'Ni' ]): print(elem) yt = r'$\Delta$' + elem if ielem == 13: xt = r'T$_{\rm eff}$' else: xt = None if ielem == 0: ax[ielem, 0].text(0.5, 1.0, 'DR13-DR12', transform=ax[ielem, 0].transAxes, ha='center', va='bottom') ax[ielem, 1].text(0.5, 1.0, 'DR14-DR12', transform=ax[ielem, 1].transAxes, ha='center', va='bottom') ax[ielem, 2].text(0.5, 1.0, 'DR14-DR13', transform=ax[ielem, 2].transAxes, ha='center', va='bottom') yr = [-0.5, 0.5] dr12elem = dr12[elem.upper() + '_H'][m1a] - dr12['FE_H'][m1a] dr13elem = dr13[elem.upper() + '_FE'][m2a] gd = np.where((dr12elem > -99) & (dr13elem > -99))[0] plots.plotc(ax[ielem, 0], dr12['TEFF'][m1a[gd]], dr13elem[gd] - dr12elem[gd], dr12['PARAM'][m1a[gd], 3], size=1, xr=[3500, 6000], yr=yr, zr=[-1, 0.5], yt=yt, xt=xt, nytick=5, rasterized=True) plots.plotl(ax[ielem, 0], xr, [0., 0.], ls=':') ax[ielem, 0].tick_params(axis='both', labelsize=8) dr12elem = dr12[elem.upper() + '_H'][m1b] - dr12['FE_H'][m1b] dr14elem = dr14[elem.upper() + '_FE'][m2b] gd = np.where((dr12elem > -99) & (dr14elem > -99))[0] plots.plotc(ax[ielem, 1], dr12['TEFF'][m1b[gd]], dr14elem[gd] - dr12elem[gd], dr12['PARAM'][m1b[gd], 3], size=1, xr=[3500, 6000], yr=yr, zr=[-1, 0.5], xt=xt, nytick=5, rasterized=True) plots.plotl(ax[ielem, 1], xr, [0., 0.], ls=':') ax[ielem, 1].tick_params(axis='both', labelsize=8) dr13elem = dr13[elem.upper() + '_FE'][m1c] dr14elem = dr14[elem.upper() + '_FE'][m2c] gd = np.where((dr13elem > -99) & (dr14elem > -99))[0] plots.plotc(ax[ielem, 2], dr13['TEFF'][m1c[gd]], dr14elem[gd] - dr13elem[gd], dr13['PARAM'][m1c[gd], 3], size=1, xr=[3500, 6000], yr=yr, zr=[-1, 0.5], xt=xt, nytick=5, rasterized=True) plots.plotl(ax[ielem, 2], xr, [0., 0.], ls=':') ax[ielem, 2].tick_params(axis='both', labelsize=8) cbaxes = fig.add_axes([0.91, 0.1, 0.01, 0.8]) cb = plt.colorbar(axim, cax=cbaxes) cb.set_label('[M/H]') cbaxes.tick_params(axis='both', labelsize=8) for item in (cbaxes.get_xticklabels() + cbaxes.get_yticklabels()): item.set_fontsize(8) fig.savefig('drcomp_elem.pdf')
from apogee.utils import apload from pyvista import tv import matplotlib.pyplot as plt from tools import plots import numpy as np import pdb def lsfsum(y): for col in range(500, 2500, 500): print(y[1].data[:, 150, col].sum(), y[1].data[:, 150, col].sum(), y[1].data[:, 150, col].sum()) load = apload.ApLoad(apred='r11') fig, ax = plots.multi(4, 3, hspace=0.001, wspace=0.001) pfig, pax = plots.multi(9, 3) for ichip, chip in enumerate(['a', 'b', 'c']): #t=tv.TV() y1 = load.apLSF(3430016)[chip] print('y1: ') lsfsum(y1) y2 = load.apLSF(7510018)[chip] print('y2: ') lsfsum(y2) y3 = load.apLSF(11130063)[chip] print('y3: ') lsfsum(y3) y4 = load.apLSF(14600018)[chip] print('y4: ') lsfsum(y4)
def merge(planfile, fields=None, outfile=None, clobber=True): ''' Match Cannon results to existing allStar file to make new table ''' p = yanny.yanny(planfile, np=True) apred = p['apred_vers'].strip("'") apstar = p['apstar_vers'].strip("'") aspcap_vers = p['aspcap_vers'].strip("'") results = p['results_vers'].strip("'") apl = apload.ApLoad(apred=apred, apstar=apstar, aspcap=aspcap_vers, results=results) a = apl.allStar()[1].data t = Table(a) out = Table() out['APOGEE_ID'] = t['APOGEE_ID'] length = len(out) if fields is None: fields = glob('*/cannonField*.fits') else: fields = glob(fields) c = fits.open(fields[0])[1].data for i, name in enumerate(c.names): print(name) if name != 'APOGEE_ID' and name != 'model_flux' and name != 'fvec' and name != 'flux' and name != 'ivar': out.add_column(Column(name=name, dtype=c.dtype[i], length=length)) print(name, type(out[name][0])) if type(out[name][0]) is np.string_: print('str!') out[name] = '' else: out[name] = -9999. # add X_M tag #out.add_column(Column(name='X_M',dtype='{:d}f4'.format(len(a['X_M'])),length=length)) for field in fields: print('field', field) c = fits.open(field)[1].data j1 = np.where(a['FIELD'] == c['FIELD'][0])[0] i1, i2 = match.match(a['APOGEE_ID'][j1], c['APOGEE_ID']) bd = np.where(c['chi_sq'][i2] <= 0.)[0] print(len(bd)) for name in out.columns: out[name][j1[i1]] = c[name][i2] if type(c[name][0]) is np.string_: bad = '' else: bad = -9999. if name is not 'APOGEE_ID': out[name][j1[i1[bd]]] = bad # for DR14, "fix" NA_H bd = np.where(out['NA_H'] < -1)[0] out['NA_H'][bd] = -9999. out['CANNON_ID'] = t['ASPCAP_ID'] if outfile is None: outfile = 'allStarCannon-' + results + '.fits' prihdr = fits.Header() prihdr['HISTORY'] = 'IDLWRAP_VERSION: ' + subprocess.check_output( 'idlwrap_version').strip('\n') prihdu = fits.PrimaryHDU(header=prihdr) hdu = fits.BinTableHDU.from_columns(np.array(out)) hdulist = fits.HDUList([prihdu, hdu]) hdulist.writeto(outfile, overwrite=clobber) ##out.write(outfile,overwrite=clobber) return out
def fit(planfile, model_name=None, spectrum_filenames=None, threads=8, clobber=True, from_filename=False, fit_velocity=False, chunk_size=1000, output_suffix=None, **kwargs): """ Fit a series of spectra. """ p = yanny.yanny(planfile, np=True) apred = p['apred_vers'].strip("'") apstar = p['apstar_vers'].strip("'") aspcap = p['aspcap_vers'].strip("'") results = p['results_vers'].strip("'") threads = int(getval(p, 'ncpus', '16')) cannon = getval(p, 'cannon_vers', 'cannon_aspcap') if model_name is None: model_name = getval(p, 'model_name', 'apogee-dr14-giants') if output_suffix is None: output_suffix = getval(p, 'output_suffix', 'result') logg = getrange(getval(p, 'logg', '-1 3.9')) teff = getrange(getval(p, 'teff', '3500 5500')) mh = getrange(getval(p, 'mh', '-3. 1.')) alpha = getrange(getval(p, 'alpha', '-0.5 1.')) root = os.environ[ 'APOGEE_REDUX'] + '/' + apred + '/' + apstar + '/' + aspcap + '/' + results + '/' + cannon + '/' model = tc.load_model(os.path.join(root, "{}.model".format(model_name)), threads=threads) assert model.is_trained label_names = model.vectorizer.label_names mean_labels = Table.read(os.path.join(root, "{}.initial".format(model_name)), format='ascii')['col0'] sig_labels = Table.read(os.path.join(root, "{}.initial".format(model_name)), format='ascii')['col2'] #mean_labels = np.loadtxt(os.path.join(root, "{}.initial".format(model_name))) logger = logging.getLogger("AnniesLasso") # get allStar file for initial labels apl = apload.ApLoad(apred=apred, apstar=apstar, aspcap=aspcap, results=results) allstar = apl.allStar()[1].data # loop over fields in planfile for field in p['ASPCAP']['field']: metadatas = [] fluxes = [] ivars = [] output_filenames = [] apogee_names = [] failures = 0 # get file names to fit try: paths = getfiles(apred, apstar, aspcap, results, cannon, field) except: return spectrum_filenames = [] initial_labels = [] apogee_ids = [] for apogee_id, inpath, outpath in paths: # only take stars within certain parameter ranges print(apogee_id) #j=apselect.select(allstar,redid=apogee_id)[0] j = np.where(((allstar['REDUCTION_ID'] == apogee_id) | (allstar['APOGEE_ID'] == apogee_id)) & (allstar['COMMISS'] == 0))[0] if (len(j) == 0): print('missing target', apogee_id) else: if len(j) > 1: j = j[0] if ((allstar['FPARAM'][j, 1] >= logg[0]) & (allstar['FPARAM'][j, 1] <= logg[1]) & (allstar['FPARAM'][j, 0] >= teff[0]) & (allstar['FPARAM'][j, 0] <= teff[1]) & (allstar['FPARAM'][j, 3] >= mh[0]) & (allstar['FPARAM'][j, 3] <= mh[1]) & (allstar['FPARAM'][j, 6] >= alpha[0]) & (allstar['FPARAM'][j, 6] <= alpha[1])): spectrum_filenames.append(outpath) apogee_names.append(apogee_id) #labels=[] #for i,label in enumerate(label_names) : # if allstar[label][j][0] > -9 : # labels.append(allstar[label][j][0]) # else : # labels.append(mean_labels[i]) #initial_labels.append(labels) if len(apogee_names) == 0: return #initial_labels=np.array(initial_labels) initial_labels = mean_labels # MAGIC HACK delete_meta_keys = ("fjac", ) # To save space... #output_suffix = kwargs.get("output_suffix", None) #output_suffix = "result" if output_suffix is None else str(output_suffix) summary_file = root + field + '/cannonField-' + os.path.basename( field) + '-' + output_suffix + '.fits' N = len(spectrum_filenames) for i, names in enumerate(zip(apogee_names, spectrum_filenames)): apogee_id = names[0] filename = names[1] logger.info("At spectrum {0}/{1}: {2}".format(i + 1, N, filename)) basename, _ = os.path.splitext(filename) output_filename = "-".join([basename, output_suffix]) + ".pkl" if os.path.exists(output_filename) and not clobber: logger.info("Output filename {} already exists and not clobbering."\ .format(output_filename)) continue try: with open(filename, "rb") as fp: metadata, data = pickle.load(fp) metadatas.append(metadata) flux, ivar = data fluxes.append(flux) ivars.append(ivar) output_filenames.append(output_filename) apogee_ids.append(apogee_id) except: logger.exception("Error occurred loading {}".format(filename)) failures += 1 else: if len(output_filenames) >= chunk_size: results, covs, metas = model.fit( fluxes, ivars, initial_labels=initial_labels, model_redshift=fit_velocity, full_output=True) for result, cov, meta, output_filename \ in zip(results, covs, metas, output_filenames): for key in delete_meta_keys: if key in meta: del meta[key] with open(output_filename, "wb") as fp: pickle.dump((result, cov, meta), fp, 2) # For legacy. logger.info( "Saved output to {}".format(output_filename)) del output_filenames[0:], fluxes[0:], ivars[0:] if len(output_filenames) > 0: results, covs, metas = model.fit(fluxes, ivars, initial_labels=initial_labels, model_redshift=fit_velocity, full_output=True) # Create an ordered dictionary of lists for all the data. data_dict = OrderedDict([("FILENAME", [])]) data_dict['APOGEE_ID'] = [] data_dict['LOCATION_ID'] = [] data_dict['FIELD'] = [] for label_name in label_names: data_dict[label_name] = [] for label_name in label_names: data_dict["{}_RAWERR".format(label_name)] = [] for label_name in label_names: data_dict["{}_ERR".format(label_name)] = [] #data_dict["COV"] = [] #meta_keys=metas[0].keys() meta_keys = ['chi_sq', 'r_chi_sq', 'model_flux'] for key in meta_keys: data_dict[key] = [] data_dict['flux'] = [] data_dict['ivar'] = [] # loop over spectra, output individual files, and accumulate for summary file for result, cov, meta, output_filename,apogee_id,metadata,flux,ivar \ in zip(results, covs, metas, output_filenames, apogee_ids,metadatas,fluxes,ivars): if np.isfinite(result).all(): outlist = [ os.path.basename(output_filename), apogee_id, metadata['LOCATION_ID'], metadata['FIELD'] ] + result.tolist() try: rawerr = np.diag(cov)**0.5 outlist.extend(rawerr) except: pdb.set_trace() outlist.extend(np.max([rawerr, sig_labels], axis=0)) #outlist.append(cov.tolist()) for key in delete_meta_keys: if key in meta: del meta[key] #outlist += [meta.get(k, v) for k, v in meta.items()] outlist += [meta.get(k) for k in meta_keys] outlist.append(flux) outlist.append(ivar) for key, value in zip(data_dict.keys(), outlist): data_dict[key].append(value) # save to pkl file? #with open(output_filename, "wb") as fp: # pickle.dump((result, cov, meta), fp, 2) # For legacy. #logger.info("Saved output to {}".format(output_filename)) # save to FITS cannonStar file hdr = fits.Header() hdr['HISTORY'] = 'IDLWRAP_VERSION: ' + subprocess.check_output( 'idlwrap_version').strip('\n') hdr['OBJ'] = apogee_id hdr['LOCID'] = metadata['LOCATION_ID'] hdr['FIELD'] = metadata['FIELD'] hdr['CHI2'] = meta.get('r_chi_sq') for i, label_name in enumerate(label_names): hdr[label_name] = result[i] hdulist = fits.HDUList(fits.PrimaryHDU(header=hdr)) hdr = fits.Header() hdr['OBSERVER'] = 'Edwin Hubble' hdr['CRVAL1'] = 4.179e0 hdr['CDELT1'] = 6.e-6 hdr['CRPIX1'] = 1 hdr['CTYPE1'] = 'LOG-LINEAR' hdr['DC-FLAG'] = 1 hdulist.append(fits.ImageHDU(flux, header=hdr)) hdulist.append( fits.ImageHDU(1. / np.sqrt(ivar), header=hdr)) hdulist.append( fits.ImageHDU(meta.get('model_flux'), header=hdr)) hdulist.writeto(output_filename.replace( '-result', '').replace('.pkl', '.fits'), overwrite=True) del output_filenames[0:], fluxes[0:], ivars[0:] logger.info("Number of failures: {}".format(failures)) logger.info("Number of successes: {}".format(N - failures)) table = Table(TableColumns(data_dict)) table.write(summary_file.replace('-result', ''), overwrite=clobber) logger.info("Written to {}".format(summary_file)) return None
def train(planfile, skip=1, threads=8, xh=None, model_name=None, censor=None, sim=False, gb=None, mh=None): ''' Define training set and train Cannon ''' p = yanny.yanny(planfile, np=True) apred = p['apred_vers'].strip("'") apstar = getval(p, 'apstar_vers', 'stars').strip("'") aspcap_vers = getval(p, 'aspcap_vers', 'aspcap').strip("'") results = getval(p, 'results_vers', 'results').strip("'") cannon = getval(p, 'cannon_vers', 'cannon_aspcap') if model_name is None: model_name = getval(p, 'model_name', 'apogee-dr14-giants') model_order = int(getval(p, 'model_order', '2')) model_scale_factor = float(getval(p, 'model_scale_factor', '1.0')) model_regularization = float(getval(p, 'model_regularization', '0.0')) threads = int(getval(p, 'ncpus', threads)) if xh is None: xh = int(getval(p, 'xh', False)) if censor is None: censor = int(getval(p, 'censor', False)) logg = getrange(getval(p, 'logg', '-1 3.9')) teff = getrange(getval(p, 'teff', '3500 5500')) if gb is None: gb = getval(p, 'gb', 0) if mh is None: mh = getrange(getval(p, 'mh', '-3. 1.')) alpha = getrange(getval(p, 'alpha', '-0.5 1.')) # label names elems = aspcap.elems()[0] #model_labels = ['TEFF','LOGG','M_H'] model_labels = ['TEFF', 'LOGG', 'M_H', 'ALPHA_M', 'FE_H'] input_labels = ['TEFF', 'LOGG', 'M_H', 'ALPHA_M', 'FE_H'] for el in elems: d = elem.dr14cal(el) if el is not 'Fe' and d['elemfit'] >= 0: if xh: model_labels.append(el.upper() + '_H') else: model_labels.append(el.upper() + '_FE') input_labels.append(el.upper() + '_FE') apl = apload.ApLoad(apred=apred, apstar=apstar, aspcap=aspcap_vers, results=results) if sim: allstar = fits.open('allStar.fits')[1].data gd = apselect.select(allstar, logg=logg, teff=teff, mh=mh, alpha=alpha, sn=[100, 10000]) model_labels = ['TEFF', 'LOGG', 'M_H'] input_labels = ['TEFF', 'LOGG', 'M_H'] model_labels = sim input_labels = sim if gb: gd2 = np.where( np.abs((allstar['TEFF'][gd] - 3500) * 4 / 2000. - allstar['LOGG'][gd]) < float(gb))[0] gd = gd[gd2] else: allstar = apl.allStar()[1].data gd = apselect.select( allstar, badval=['STAR_BAD'], sn=[100, 10000], logg=logg, teff=teff, mh=mh, alpha=alpha, badstar=['PERSIST_HIGH', 'PERSIST_MED', 'PERSIST_LOW'], gb=gb) gcstars = ascii.read(os.environ['IDLWRAP_DIR'] + '/data/gc_szabolcs.dat') bd = np.where(gcstars['pop'] != 1)[0] jc = [ x for x in gd if allstar[x]['APOGEE_ID'] not in gcstars['id'][bd] ] gd = jc # down select stars using HR+[M/H] sampling i1, i2 = cal.hrsample(allstar, allstar[gd], raw=False) # make sure all labels are good gd = [] for i in i1: good = True for label in input_labels: # special handling for NA in DR14 if label == 'NA_FE' and allstar[label][i] < -5 and allstar[ 'FE_H'][i] < -1: allstar[label][i] = 0. if allstar[label][i] < -5: good = False print('reject', allstar['APOGEE_ID'][i], label, allstar[label][i]) break if good: gd.append(i) print('selected ', len(gd), ' training set stars') root = os.environ['APOGEE_ASPCAP'] + '/' + apred + '/' + cannon + '/' training_set = os.path.join(root, "{}-training-set.fits".format(model_name)) if not os.path.exists(os.path.dirname(training_set)): os.makedirs(os.path.dirname(training_set)) struct.wrfits(np.array(allstar[gd]), training_set) # The label names to use in the model. model_filename = os.path.join(root, "{}.model".format(model_name)) initial_filename = os.path.join(root, "{}.initial".format(model_name)) clobber_model = True labelled_set = Table.read(training_set)[0:-1:skip] N_labelled = len(labelled_set) if xh: for el in elems: d = elem.dr14cal(el) if el is not 'Fe' and d['elemfit'] >= 0: labelled_set[el.upper() + '_H'] = labelled_set[el.upper() + '_FE'] + labelled_set['FE_H'] # TODO: something's wrong with our dispersion that we extracted. #with open(os.path.join(CANNON_DATA_DIR, "dispersion.pkl"), "rb") as fp: # dispersion = pickle.load(fp) #P = dispersion.size dispersion = None P = 8575 # MAGIC # These defaults (flux = 1, ivar = 0) will mean that even if we don't find a # spectrum for a single star in the training set, then that star will just have # no influence on the training (since ivar = 0 implies infinite error on flux). normalized_flux = np.ones((N_labelled, P), dtype=float) normalized_ivar = np.zeros((N_labelled, P), dtype=float) # Enable logging. logger = logging.getLogger("apogee.dr14.tc") logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setFormatter( logging.Formatter("%(asctime)s [%(levelname)-8s] %(message)s")) logger.addHandler(handler) sdss_path = path.Path() ngd = 0 for i, row in enumerate(labelled_set): logger.info("Reading labelled set spectra ({}/{})".format( i + 1, N_labelled)) if row['TELESCOPE'] == 'apo1m': filename = sdss_path.full('cannonStar-1m', apred=apred, apstar=apstar, aspcap=aspcap_vers, results=results, cannon=cannon, field=row['FIELD'], reduction=row['REDUCTION_ID'], telescope=row['TELESCOPE']) else: filename = sdss_path.full('cannonStar', apred=apred, apstar=apstar, aspcap=aspcap_vers, results=results, cannon=cannon, field=row['FIELD'], obj=row['APOGEE_ID'], telescope=row['TELESCOPE']) if not os.path.exists(filename): logger.warn("Could not find filename for labelled set star {}: {}"\ .format(row["APOGEE_ID"], filename)) continue with open(filename, "rb") as fp: #flux, ivar = pickle.load(fp) metadata, data = pickle.load(fp) flux, ivar = data if (np.isfinite(flux).all()) & (np.isfinite(ivar).all()): normalized_flux[i, :] = flux normalized_ivar[i, :] = ivar else: print('non-finite values in', row['APOGEE_ID']) normalized_flux[i, :] = 0. normalized_ivar[i, :] = 0. #pdb.set_trace() # TODO: Cache the normalized_flux and normalized_ivar into a single file so that # it is faster to read in next time? assert np.isfinite(normalized_flux).all(), \ "Non-finite values in normalized_flux!" assert np.isfinite(normalized_ivar).all(), \ "Non-finite values in normalized_ivar!" # Exclude labelled set stars where there is no spectrum, only because it # will get annoying later on when we are doing 1-to-1 and cross-validation keep = np.any(normalized_ivar > 0, axis=1) if not np.all(keep): logger.info( "Excluding {} labelled set stars where there was no information in " "the spectrum".format(np.sum(~keep))) labelled_set = labelled_set[keep] normalized_flux = normalized_flux[keep] normalized_ivar = normalized_ivar[keep] # Construct and train a model. # model = tc.L1RegularizedCannonModel(labelled_set, normalized_flux, normalized_ivar, dispersion, threads=threads) model.vectorizer = tc.vectorizer.NormalizedPolynomialVectorizer( labelled_set, tc.vectorizer.polynomial.terminator(model_labels, model_order), scale_factor=model_scale_factor) if censor: for label in model_labels: for el in elems: d = elem.dr14cal(el) if xh: lab = el.upper() + '_H' else: lab = el.upper() + '_FE' if lab == label: model.censors[label] = getcensor( el, maskdir=os.environ['SPECLIB_DIR'] + '/lib/filters_26042016/', length=P) print( label, getcensor(el, maskdir=os.environ['SPECLIB_DIR'] + '/lib/filters_26042016/')) model.s2 = 0 model.regularization = model_regularization model.train() model._set_s2_by_hogg_heuristic() model.save(model_filename, include_training_data=False, overwrite=clobber_model) model.save(model_filename + '.full', include_training_data=True, overwrite=clobber_model) # Make some 1-to-1 plots just to show sensible behaviour. #X = model.labels_array() X = model.labels_array Y = model.fit(model.normalized_flux, model.normalized_ivar) out = Table( np.transpose([ np.mean(model.labels_array, axis=0), np.nanmean(Y - X, axis=0), np.nanstd(Y - X, axis=0), np.array(model_labels) ])) out.write(initial_filename, overwrite=True, format='ascii') #np.savetxt(initial_filename, [np.mean(model.labels_array, axis=0).reshape(-1, 1), np.nanmean(Y-X,axis=0), np.nanstd(Y-X,axis=0), model_labels] ) try: os.makedirs(os.path.join(root, 'plots')) except: pass it = model_labels.index('TEFF') ig = model_labels.index('LOGG') iz = model_labels.index('M_H') def plotit(ax, x, y, z, label): plots.plotc(ax, x, y - x, z, xt=label, yt='inferred-labelled') lims = ax.get_xlim() ax.plot(lims, [0., 0.], c="#666666", zorder=-1, linestyle=":") mean, rms = np.nanmean(y - x), np.nanstd(y - x) title = "{}: ({:.2f}, {:.2f})".format(label, mean, rms) ax.set_title(title) fig, ax = plots.multi(2, 3) plotit(ax[0, 0], X[:, it], Y[:, it], X[:, iz], 'TEFF') plotit(ax[0, 1], X[:, ig], Y[:, ig], X[:, it], 'LOGG') plotit(ax[1, 0], X[:, iz], Y[:, iz], X[:, it], 'M_H') plots.plotc(ax[1, 1], X[:, it], X[:, ig], X[:, iz], xr=[6000, 3500], yr=[5, -0.5], xt='TEFF', yt='LOGG') gd = np.where(model.normalized_ivar.flatten() > 1)[0] diff = np.abs(model.normalized_flux - model.predict(Y)) ax[2, 0].hist(diff.flatten()[gd], cumulative=True, normed=True, bins=10.**np.arange(-8, 0, 0.05), histtype='step') ax[2, 0].set_xlabel('|Model-true|') ax[2, 0].set_ylim(0., 1.) ax[2, 0].set_xscale('log') figure_path = os.path.join(root, "plots/{}-1to1.png".format(model_name)) fig.tight_layout() fig.savefig(figure_path, dpi=300) plt.close() for i, label_name in enumerate(model_labels): x = X[:, i] y = Y[:, i] fig, ax = plt.subplots() if label_name == 'TEFF': ax.scatter(x, y, c=X[:, iz], alpha=0.5) else: ax.scatter(x, y, c=X[:, it], alpha=0.5) lims = np.array([ax.get_xlim(), ax.get_ylim()]) lims = (lims.min(), lims.max()) ax.plot(lims, lims, c="#666666", zorder=-1, linestyle=":") ax.set_xlim(lims) ax.set_ylim(lims) ax.set_xlabel("Labelled") ax.set_ylabel("Inferred") mean, rms = np.nanmean(y - x), np.nanstd(y - x) title = "{}: ({:.2f}, {:.2f})".format(label_name, mean, rms) ax.set_title(title) logger.info("Mean and RMS for {}".format(title)) figure_path = os.path.join( root, "plots/{}-{}-1to1.png".format(model_name, label_name)) fig.tight_layout() fig.savefig(figure_path, dpi=300) plt.close() logger.info("Created 1-to-1 figure for {} at {}".format( label_name, figure_path))
def compare(planfile, model_name=None, outfile=None, xh=False, output_suffix=''): ''' Make some plots with results ''' p = yanny.yanny(planfile, np=True) apred = p['apred_vers'].strip("'") apstar = p['apstar_vers'].strip("'") aspcap_vers = p['aspcap_vers'].strip("'") results = p['results_vers'].strip("'") if model_name is None: model_name = getval(p, 'model_name', 'apogee-dr14-giants') apl = apload.ApLoad(apred=apred, apstar=apstar, aspcap=aspcap_vers, results=results) a = apl.allStar()[1].data if outfile is None: outfile = 'allStarCannon-' + results + '.fits' c = fits.open(outfile)[1].data elems = aspcap.elems()[0] elems = ['Ca', 'Ni'] model_labels = ['TEFF', 'LOGG', 'M_H', 'ALPHA_M', 'FE_H'] figs = [] ytit = [] for el in elems: d = elem.dr14cal(el) if el is not 'Fe' and d['elemfit'] >= 0: print(el) tag = el.upper() + '_FE' if xh: ctag = el.upper() + '_H' else: ctag = el.upper() + '_FE' model_labels.append(ctag) f = [] xtit = [] for tmin in range(3500, 5500, 500): fig, ax = plots.multi(4, 2, hspace=0.001, wspace=0.001, figsize=(10, 4.5)) for i, snmin in enumerate(range(50, 250, 50)): #j = apselect.select(a,badval='STAR_BAD',sn=[snmin,100000],teff=[tmin,tmin+500],logg=[-1,3.9],mh=[-3.,1],alpha=[-0.5,1.],raw=True) j = np.where( (np.chararray.find(a['ASPCAPFLAGS'], 'STAR_BAD') < 0) & (c['TEFF'] > tmin) & (c['TEFF'] < tmin + 500) & (a['SNR'] > snmin) & (a['SNR'] < snmin + 50))[0] if i == 0: yt = '[' + el + '/Fe]' else: yt = None ax1 = plots.plotc(ax[0, i], a['FE_H'][j], a[tag][j], a['ASPCAP_CHI2'][j], xr=[-2.5, 1], yr=[-0.5, 0.75], zr=[0, 10], xt='[Fe/H]', yt=yt, nxtick=6, rasterized=True) ax[0, i].text(0.1, 0.9, 'ASPCAP:', transform=ax[0, i].transAxes) ax[0, i].text(0.2, 0.8, 'S/N > {:d}'.format(snmin), transform=ax[0, i].transAxes) if xh: ax2 = plots.plotc(ax[1, i], c['FE_H'][j], c[ctag][j] - c['FE_H'][j], c['r_chi_sq'][j], xr=[-2.5, 1], yr=[-0.5, 0.75], zr=[0, 5], xt='[Fe/H]', yt=yt, nxtick=6, rasterized=True) else: ax2 = plots.plotc(ax[1, i], c['FE_H'][j], c[ctag][j], c['r_chi_sq'][j], xr=[-2.5, 1], yr=[-0.5, 0.75], zr=[0, 5], xt='[Fe/H]', yt=yt, nxtick=6, rasterized=True) ax[1, i].text(0.1, 0.9, 'Cannon:', transform=ax[1, i].transAxes) ax[1, i].text(0.2, 0.8, '{:d} < S/N < {:d}'.format(snmin, snmin + 50), transform=ax[1, i].transAxes) file = '{:s}_{:04d}{:s}'.format(el, tmin + 250, output_suffix) cbaxes = fig.add_axes([0.91, 0.55, 0.01, 0.3]) cb = plt.colorbar(ax1, cax=cbaxes) cb.set_label('CHI2') cbaxes.tick_params(axis='both', labelsize=8) cbaxes = fig.add_axes([0.91, 0.15, 0.01, 0.3]) cb = plt.colorbar(ax2, cax=cbaxes) cb.set_label('CHI2') cbaxes.tick_params(axis='both', labelsize=8) #fig.savefig('newplots/'+file+'.jpg',dpi=300) fig.savefig('newplots/' + file + '.pdf') plt.close() f.append(file) xtit.append('{:04d} < Teff < {:04d}'.format(tmin, tmin + 500)) figs.append(f) ytit.append(el) html.htmltab(figs, file='plots/cannon' + output_suffix + '.html', ytitle=ytit, xtitle=xtit) figs = [] ytit = [] for label in model_labels: print(label) fig, ax = plots.multi(1, 1, figsize=(12, 8)) try: gd = np.where((a[label] > -999) & (a[label] < 999) & (c[label] > -999))[0] plots.plotc(ax, a[label][gd], c[label][gd], a['M_H'][gd], zr=[-2, 0.5], colorbar=False, xt=label) except: alabel = label.replace('_H', '_FE') print(alabel) gd = np.where((a[alabel] > -999) & (a[alabel] < 999) & (c[label] > -999))[0] plots.plotc(ax, a[alabel][gd] + a['FE_H'][gd], c[label][gd], a['M_H'][gd], zr=[-2, 0.5], colorbar=False, xt=label) file = 'fullcomp_' + label + output_suffix + '.jpg' fig.tight_layout() fig.savefig('newplots/' + file, dpi=300) plt.close() f = [file, model_name + '-' + label + '-1to1.png'] figs.append(f) ytit.append(label) html.htmltab(figs, file='newplots/compare' + output_suffix + '.html', ytitle=ytit)