def test_step(date): wl = np.load("../run_2_train_on_good/wl.npz")['arr_0'] test_ID = np.load("%s_test_ids.npz" %date)['arr_0'] test_flux = np.load("%s_test_flux.npz" %date)['arr_0'] test_ivar = np.load("%s_test_ivar.npz" %date)['arr_0'] nlabels = 4 nobj = len(test_ID) lamost_label_3 = np.load("%s_lamost_label.npz" %date)['arr_0'] # add extra column to make it symmetric with the inferred test labels toadd = np.ones(nobj)[...,None] lamost_label = np.hstack((lamost_label_3, toadd)) ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) m = model.CannonModel(2) m.coeffs = np.load("../run_5_train_on_good/coeffs.npz")['arr_0'] m.scatters = np.load("../run_5_train_on_good/scatters.npz")['arr_0'] m.chisqs = np.load("../run_5_train_on_good/chisqs.npz")['arr_0'] m.pivots = np.load("../run_5_train_on_good/pivots.npz")['arr_0'] nguesses = 4 starting_guesses = np.zeros((nguesses,nlabels)) hiT_hiG_hiM = np.array([ 5.15273730e+03, 3.71762228e+00, 3.16861898e-01, 2.46907920e-02]) hiT_hiG_loM = np.array([ 5.16350098e+03, 3.45917511e+00, -9.24426436e-01, 2.49296919e-01]) loT_loG_hiM = np.array([ 4.04936841e+03, 1.47109437e+00, 2.07210138e-01, 1.49733415e-02]) loT_loG_loM = np.array([ 4.00651318e+03, 8.35013509e-01, -8.98257852e-01, 7.65705928e-02]) starting_guesses[0,:] = hiT_hiG_hiM-m.pivots starting_guesses[1,:] = hiT_hiG_loM-m.pivots starting_guesses[2,:] = loT_loG_loM-m.pivots starting_guesses[3,:] = loT_loG_hiM-m.pivots labels = np.zeros((nguesses, nobj, nlabels)) # 4,10955,4 chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii,guess in enumerate(starting_guesses): a,b,c = test_step_iteration(ds,m,starting_guesses[ii]) labels[ii,:] = a chisq[ii,:] = b errs[ii,:] = c choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj,val in enumerate(choose): best_labels[jj,:] = labels[:,jj,:][val] best_errs[jj,:] = errs[:,jj,:][val] np.savez("./%s_all_cannon_labels.npz" %date, best_labels) np.savez("./%s_cannon_label_chisq.npz" %date, best_chisq) np.savez("./%s_cannon_label_errs.npz" %date, best_errs) ds.test_label_vals = best_labels ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date) ds.diagnostics_1to1(figname = "%s_1to1_test_label.png" %date)
def test_step(date): direc = "../xcalib_4labels" wl = np.load("%s/wl.npz" % direc)['arr_0'] test_ID = np.load("%s/output/%s_ids.npz" % (direc, date))['arr_0'] print(str(len(test_ID)) + " objects") test_flux = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_0'] test_ivar = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_1'] lamost_label = np.load("%s/output/%s_tr_label.npz" % (direc, date))['arr_0'] apogee_label = np.load("./tr_label.npz")['arr_0'] ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names( ['T_{eff}', '\log g', '[Fe/H]', '[\\alpha/Fe]', 'log M', 'A_k']) m = model.CannonModel(2) m.coeffs = np.load("./coeffs.npz")['arr_0'] m.scatters = np.load("./scatters.npz")['arr_0'] m.chisqs = np.load("./chisqs.npz")['arr_0'] m.pivots = np.load("./pivots.npz")['arr_0'] nlabels = len(m.pivots) nobj = len(test_ID) nguesses = 7 choose = np.random.randint(0, apogee_label.shape[0], size=nguesses) starting_guesses = apogee_label[choose, :] - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c np.savez("output/%s_cannon_label_guesses.npz" % date, labels) np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("output/%s_all_cannon_labels.npz" % date, best_labels) np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq) np.savez("output/%s_cannon_label_errs.npz" % date, best_errs) ds.test_label_vals = best_labels #ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date) ds.test_label_vals = best_labels[:, 0:3] ds.set_label_names(['T_{eff}', '\log g', '[M/H]']) ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
def __init__(self, order): self.m_oder = order self.m_train_dataset_newtype = None self.m_train_dataset = None self.m_model = model.CannonModel(order, useErrors=False)
def train(): wl = np.load("%s/../wl_cols.npz" % SPEC_DIR)['arr_0'] tr_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0'] tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'] tr_label = tr_label[:, 0:3] tr_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0'] tr_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0'] ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux, tr_ivar) # teff, logg, mh, cm, nm, am, ak ds.set_label_names(['T_{eff}', '\log g', '[Fe/H]']) #, '[C/M]','[N/M]', #'[\\alpha/M]', 'A_k']) #ds.diagnostics_SNR() #ds.diagnostics_ref_labels() #np.savez("ref_snr.npz", ds.tr_SNR) print("Training model") nlab = ds.tr_label.shape[1] print(nlab) npix = len(ds.wl) print(npix) filt = np.ones((nlab, npix), dtype=bool) print(filt) #filt[nlab-1,0:500] = 0 m = model.CannonModel(2, wl_filter=filt) m.fit(ds) np.savez("./coeffs.npz", m.coeffs) np.savez("./scatters.npz", m.scatters) np.savez("./chisqs.npz", m.chisqs) np.savez("./pivots.npz", m.pivots) m.diagnostics_leading_coeffs(ds)
def test_step(): #wl = np.load("%s/wl.npz" %SPEC_DIR)['arr_0'] wl = np.load("wl_cols.npz")['arr_0'] test_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0'] tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'] test_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0'] test_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0'] #tr_id = np.load("./ref_id.npz")['arr_0'] #tr_flux = np.load("./ref_flux.npz")['arr_0'] #tr_ivar = np.load("./ref_ivar.npz")['arr_0'] #tr_label = np.load("./ref_label.npz")['arr_0'] ds = dataset.Dataset(wl, test_id, test_flux, test_ivar, tr_label, test_id, test_flux, test_ivar) ds.set_label_names([ 'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k' ]) m = model.CannonModel(2) m.coeffs = np.load("./culled_coeffs.npz")['arr_0'] m.scatters = np.load("./culled_scatters.npz")['arr_0'] m.chisqs = np.load("./culled_chisqs.npz")['arr_0'] m.pivots = np.load("./culled_pivots.npz")['arr_0'] nguesses = 10 nobj = len(ds.test_ID) nlabels = len(m.pivots) choose = np.random.randint(0, nobj, size=nguesses) starting_guesses = ds.tr_label[choose] - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c #np.savez("labels_all_starting_vals.npz", labels) #np.savez("chisq_all_starting_vals.npz", chisq) #np.savez("errs_all_starting_vals.npz", errs) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros(tr_label.shape) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("./cannon_label_vals.npz", best_labels) np.savez("./cannon_label_chisq.npz", best_chisq) np.savez("./cannon_label_errs.npz", best_errs) ds.test_label_vals = best_labels ds.diagnostics_survey_labels() ds.diagnostics_1to1(figname="1to1_test_label")
def load_model(ii): print("Loading model") m = model.CannonModel(2) m.coeffs = np.load("./ex%s_coeffs.npz" % ii)['arr_0'] m.scatters = np.load("./ex%s_scatters.npz" % ii)['arr_0'] m.chisqs = np.load("./ex%s_chisqs.npz" % ii)['arr_0'] m.pivots = np.load("./ex%s_pivots.npz" % ii)['arr_0'] return m
def train(ds, leave_out): print("TRAINING") m = model.CannonModel(2) m.fit(ds) np.savez("./model_%s.npz" % leave_out, m.coeffs, m.scatters, m.chisqs, m.pivots) fig = m.diagnostics_leading_coeffs(ds) plt.savefig("leading_coeffs_%s.png" % leave_out) plt.close() return m
def train(ds, ii): """ Run the training step, given a dataset object. """ print("Loading model") m = model.CannonModel(2) print("Training...") m.fit(ds) np.savez("./ex%s_coeffs.npz" % ii, m.coeffs) np.savez("./ex%s_scatters.npz" % ii, m.scatters) np.savez("./ex%s_chisqs.npz" % ii, m.chisqs) np.savez("./ex%s_pivots.npz" % ii, m.pivots) fig = m.diagnostics_leading_coeffs(ds) plt.savefig("ex%s_leading_coeffs.png" % ii) # m.diagnostics_leading_coeffs_triangle(ds) # m.diagnostics_plot_chisq(ds) return m
def load_model(): """ Load the model Parameters ---------- direc: directory with all of the model files Returns ------- m: model object """ direc = "/home/annaho/TheCannon/code/lamost/mass_age/cn" m = model.CannonModel(2) m.coeffs = np.load(direc + "/coeffs.npz")['arr_0'][0:3626, :] # no cols m.scatters = np.load(direc + "/scatters.npz")['arr_0'][0:3626] # no cols m.chisqs = np.load(direc + "/chisqs.npz")['arr_0'][0:3626] # no cols m.pivots = np.load(direc + "/pivots.npz")['arr_0'] return m
def train(): wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0'] tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0'] tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0'] tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0'] tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0'] val_id = np.load("%s/val_id.npz" %DATA_DIR)['arr_0'] val_flux = np.load("%s/val_flux_norm.npz" %DATA_DIR)['arr_0'] val_ivar = np.load("%s/val_ivar_norm.npz" %DATA_DIR)['arr_0'] ds = dataset.Dataset( wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4], val_id, val_flux, val_ivar) ds.set_label_names(["Teff", "logg", "FeH", 'aFe']) np.savez("%s/tr_SNR.npz" %DATA_DIR, ds.tr_SNR) fig = ds.diagnostics_SNR() plt.savefig("%s/SNR_dist.png" %DATA_DIR) plt.close() fig = ds.diagnostics_ref_labels() plt.savefig("%s/ref_label_triangle.png" %DATA_DIR) plt.close() md = model.CannonModel(2) md.fit(ds) fig = md.diagnostics_leading_coeffs(ds) plt.savefig("%s/leading_coeffs.png" %DATA_DIR) plt.close() np.savez("%s/coeffs.npz" %DATA_DIR, md.coeffs) np.savez("%s/scatters.npz" %DATA_DIR, md.scatters) np.savez("%s/chisqs.npz" %DATA_DIR, md.chisqs) np.savez("%s/pivots.npz" %DATA_DIR, md.pivots)
def train(): # Load training set wl = np.load("../data/wl.npz")['arr_0'] tr_id = np.load("tr_id.npz")['arr_0'] tr_label = np.load("tr_label.npz")['arr_0'] tr_flux = np.load("tr_flux.npz")['arr_0'] tr_ivar = np.load("tr_ivar.npz")['arr_0'] ds = dataset.Dataset( wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux, tr_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE']) ds.diagnostics_SNR() ds.diagnostics_ref_labels() np.savez("./tr_snr.npz", ds.tr_SNR) m = model.CannonModel(2) m.fit(ds) np.savez("./coeffs.npz", m.coeffs) np.savez("./scatters.npz", m.scatters) np.savez("./chisqs.npz", m.chisqs) np.savez("./pivots.npz", m.pivots) m.diagnostics_leading_coeffs(ds) m.diagnostics_leading_coeffs_triangle(ds) m.diagnostics_plot_chisq(ds)
for ii,val in enumerate(files): ids.append(val.split("/")[-1]) wl, flux, ivar = load_spectra(files) # normalize norm_flux, norm_ivar = normalize(wl, flux, ivar, L=50) # import model parameters modeldir = "/Users/annaho/Github/TheCannon/data/LAMOST/Label_Transfer" chisq = np.load(modeldir + "/chisqs.npz")['arr_0'] coeff = np.load(modeldir + "/coeffs.npz")['arr_0'] scat = np.load(modeldir + "/scatters.npz")['arr_0'] pivot = np.load(modeldir + "/pivots.npz")['arr_0'] # initialize dataset and model m = model.CannonModel(2, useErrors=False) m.coeffs = coeff m.chisq = chisq m.scatters = scat m.pivots = pivot m.scales = np.ones(len(pivot)) # labels labeldir = "/Users/annaho/Github/TheCannon/data/LAMOST/Label_Transfer" inputf = pyfits.open("%s/Ho_et_all_catalog_resubmit.fits" %labeldir) cat = inputf[1].data inputf.close() inds = np.array([np.where(cat['LAMOST_ID']==val)[0][0] for val in ids]) ra = cat['RA'][inds] dec = cat['Dec'][inds]
def test(): wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0'] tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0'] tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0'] tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0'] test_id = np.load("%s/test_id.npz" %DATA_DIR)['arr_0'] test_flux = np.load("%s/test_flux_norm.npz" %DATA_DIR)['arr_0'] test_ivar = np.load("%s/test_ivar_norm.npz" %DATA_DIR)['arr_0'] tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0'] coeffs = np.load("%s/coeffs.npz" %DATA_DIR)['arr_0'] scatters = np.load("%s/scatters.npz" %DATA_DIR)['arr_0'] chisqs = np.load("%s/chisqs.npz" %DATA_DIR)['arr_0'] pivots = np.load("%s/pivots.npz" %DATA_DIR)['arr_0'] ds = dataset.Dataset( wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4], test_id, test_flux, test_ivar) np.savez("%s/test_SNR.npz" %DATA_DIR, ds.test_SNR) ds.set_label_names(["Teff", "logg", "FeH", "aFe"]) md = model.CannonModel(2) md.coeffs = coeffs md.scatters = scatters md.chisqs = chisqs md.pivots = pivots md.diagnostics_leading_coeffs(ds) nguesses = 7 nobj = len(ds.test_ID) nlabels = ds.tr_label.shape[1] choose = np.random.randint(0,nobj,size=nguesses) starting_guesses = ds.tr_label[choose]-md.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) ds.tr_label = np.zeros((nobj, nlabels)) for ii,guess in enumerate(starting_guesses): a,b,c = test_step_iteration(ds,md,starting_guesses[ii]) labels[ii,:] = a chisq[ii,:] = b errs[ii,:] = c np.savez("%s/labels_all_starting_vals.npz" %DATA_DIR, labels) np.savez("%s/chisq_all_starting_vals.npz" %DATA_DIR, chisq) np.savez("%s/errs_all_starting_vals.npz" %DATA_DIR, errs) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj,val in enumerate(choose): best_labels[jj,:] = labels[:,jj,:][val] best_errs[jj,:] = errs[:,jj,:][val] np.savez("%s/test_cannon_labels.npz" %DATA_DIR, best_labels) np.savez("%s/test_errs.npz" %DATA_DIR, best_errs) np.savez("%s/test_chisq.npz" %DATA_DIR, best_chisq) ds.test_label_vals = best_labels
# (2) IDENTIFY CONTINUUM PIXELS pseudo_tr_flux, pseudo_tr_ivar = ds.continuum_normalize_training_q( q=0.90, delta_lambda=50) ds.ranges = [[371, 3192], [3697, 5500], [5500, 5997], [6461, 8255]] contmask = ds.make_contmask(pseudo_tr_flux, pseudo_tr_ivar, frac=0.07) ds.set_continuum(contmask) cont = ds.fit_continuum(3, "sinusoid") norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \ ds.continuum_normalize(cont) ds.tr_flux = norm_tr_flux ds.tr_ivar = norm_tr_ivar ds.test_flux = norm_test_flux ds.test_ivar = norm_test_ivar from TheCannon import model md = model.CannonModel(2) md.fit(ds) md.diagnostics_contpix(ds) md.diagnostics_leading_coeffs(ds) md.diagnostics_plot_chisq(ds) label_errs = md.infer_labels(ds) test_labels = ds.test_label_vals ds.diagnostics_test_step_flagstars() ds.diagnostics_survey_labels() dset.diagnostics_1to1()
def loop(num_sets): wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0'] label_names = [ 'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k' ] ref_id = np.load("%s/ref_id_col.npz" % SPEC_DIR)['arr_0'] ref_choose = np.load("%s/ref_id.npz" % DATA_DIR)['arr_0'] inds = np.array([np.where(ref_id == val)[0][0] for val in ref_choose]) ref_id = ref_id[inds] ref_flux = np.load("%s/ref_flux_col.npz" % SPEC_DIR)['arr_0'][inds] ref_ivar = np.load("%s/ref_ivar_col.npz" % SPEC_DIR)['arr_0'][inds] np.savez("ref_id.npz", ref_id) np.savez("ref_flux.npz", ref_flux) np.savez("ref_ivar.npz", ref_ivar) ds = dataset.Dataset(wl[0:3626], ref_id, ref_flux[:, 0:3626], ref_ivar[:, 0:3626], [], [], [], []) np.savez("ref_snr.npz", ds.tr_SNR) ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'][inds] #ref_label = np.load("%s/xval_cannon_label_vals.npz" %TR_LAB_DIR)['arr_0'] np.savez("ref_label.npz", ref_label) assignments = np.load("%s/../assignments.npz" % DATA_DIR)['arr_0'] print("looping through %s sets" % num_sets) for leave_out in range(0, num_sets): print("leaving out %s" % leave_out) training = assignments != leave_out test = assignments == leave_out tr_id = ref_id[training] tr_flux = ref_flux[training] tr_ivar = ref_ivar[training] tr_ivar[np.isnan(tr_ivar)] = 0.0 tr_label = ref_label[training] np.savez("tr_set_%s.npz" % leave_out, tr_id, tr_flux, tr_ivar, tr_label) test_id = ref_id[test] test_flux = ref_flux[test] test_ivar = ref_ivar[test] test_ivar[np.isnan(test_ivar)] = 0.0 test_label = ref_label[test] np.savez("test_set_%s.npz" % leave_out, test_id, test_flux, test_ivar, test_label) ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id, test_flux, test_ivar) print(ds.wl) ds.set_label_names(label_names) fig = ds.diagnostics_SNR() plt.savefig("SNRdist_%s.png" % leave_out) plt.close() fig = ds.diagnostics_ref_labels() plt.savefig("ref_label_triangle_%s.png" % leave_out) plt.close() np.savez("tr_snr_%s.npz" % leave_out, ds.tr_SNR) modelf = "model_%s.npz" % leave_out if glob.glob(modelf): print("model already exists") coeffs = np.load(modelf)['arr_0'] scatters = np.load(modelf)['arr_1'] chisqs = np.load(modelf)['arr_2'] pivots = np.load(modelf)['arr_3'] m = model.CannonModel(2) m.coeffs = coeffs m.scatters = scatters m.chisqs = chisqs m.pivots = pivots else: m = train(ds, leave_out) ds.tr_label = test_label validate(ds, m, leave_out)
def plot(ii, wl, flux, ivar, model_all, coeffs, scatters, chisqs, pivots, start_wl, end_wl, highlights, figname): xmin = start_wl xmax = end_wl f = flux[ii, :] iv = ivar[ii, :] model_spec = model_all[ii, :] choose = np.logical_and(wl > xmin, wl < xmax) ymin = min(f[choose]) - 0.05 ymax = max(f[choose]) + 0.05 # err = scat ^2 + uncertainty^2 m = model.CannonModel(2, useErrors=False) m.coeffs = coeffs m.scatters = scatters m.chisqs = chisqs m.pivots = pivots scat = m.scatters iv_tot = (iv / (scat**2 * iv + 1)) err = np.ones(len(iv_tot)) * 1000 err[iv_tot > 0] = 1 / iv_tot[iv_tot > 0]**0.5 #print("X2 is: " + str(sum((f - model_spec)**2 * iv_tot))) # Cinv = ivars / (1 + ivars*scatter**2) # lTCinvl = np.dot(lvec.T, Cinv[:, None] * lvec) # lTCinvf = np.dot(lvec.T, Cinv * fluxes) # Thanks to David Hogg / Andy Casey for this... # I stole it from the Annie's Lasso Github. gs = gridspec.GridSpec(2, 1, height_ratios=[1, 4]) fig = plt.figure(figsize=(13.3, 4)) ax_residual = plt.subplot(gs[0]) ax_spectrum = plt.subplot(gs[1]) ax_spectrum.plot(wl, f, c='k', alpha=0.7, drawstyle='steps-mid', label="Data") #ax_spectrum.scatter(wl, f, c='k') ax_spectrum.plot(wl, model_spec, c='r', alpha=0.7, label="The Cannon Model") ax_spectrum.fill_between(wl, model_spec + err, model_spec - err, alpha=0.1, color='r') ax_spectrum.set_ylim(ymin, ymax) ax_spectrum.set_xlim(xmin, xmax) ax_spectrum.axhline(1, c="k", linestyle=":", zorder=-1) ax_spectrum.legend(loc="lower right") resid = f - model_spec r_ymin = min(resid[choose]) - 0.01 r_ymax = max(resid[choose]) + 0.01 ax_residual.plot(wl, resid, c='k', alpha=0.8, drawstyle='steps-mid') ax_residual.fill_between(wl, resid + err, resid - err, alpha=0.1, color='k') ax_residual.set_ylim(r_ymin, r_ymax) ax_residual.set_xlim(ax_spectrum.get_xlim()) ax_residual.axhline(0, c="k", linestyle=":", zorder=-1) for highlight in highlights: ax_residual.axvline(x=highlight, c='r', linewidth=2, linestyle='--') ax_residual.set_xticklabels([]) ax_residual.yaxis.set_major_locator(MaxNLocator(3)) ax_residual.xaxis.set_major_locator(MaxNLocator(6)) ax_spectrum.xaxis.set_major_locator(MaxNLocator(6)) ax_spectrum.yaxis.set_major_locator(MaxNLocator(4)) ax_spectrum.set_xlabel(r"Wavelength $\lambda (\AA)$", fontsize=18) ax_spectrum.set_ylabel("Normalized flux", fontsize=18) ax_spectrum.tick_params(axis="both", labelsize=18) ax_residual.tick_params(axis="both", labelsize=18) fig.tight_layout() for highlight in highlights: plt.axvline(x=highlight, c='r', linewidth=2, linestyle='--') #plt.show() plt.savefig(figname) plt.close()
def run(date): # Training step has already been completed. Load the model, spectral_model = model.CannonModel(2) # 2 = quadratic model spectral_model.coeffs = np.load("./coeffs.npz")['arr_0'] spectral_model.scatters = np.load("./scatter.npz")['arr_0'] spectral_model.chisqs = np.load("./chisqs.npz")['arr_0'] spectral_model.pivots = np.load("./pivots.npz")['arr_0'] # Load the wavelength array wl = np.load("wl.npz")['arr_0'] # Load the test set, test_ID = np.loadtxt("test_obj/%s_test_obj.txt" %date, dtype=str) print("%s test objects" %len(test_ID)) dir_dat = "/home/share/LAMOST/DR2/DR2_release" test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID) np.savez("output/%s_ids" %date, test_IDs) #np.savez("./%s_data_raw" %date, test_flux, test_ivar) # Load the corresponding LAMOST labels, labels = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0'] inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) nstars = len(test_IDs) lamost_labels = np.zeros((nstars,4)) lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) np.savez("output/%s_lamost_label" %date, lamost_labels) # Set dataset object data = dataset.Dataset( wl, test_IDs, test_flux, test_ivar, lamost_labels, test_IDs, test_flux, test_ivar) # set the headers for plotting data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) # Plot SNR distribution data.diagnostics_SNR(figname="%s_SNRdist.png" %date) np.savez("output/%s_SNR" %date, data.test_SNR) # Continuum normalize, filename = "output/%s_norm.npz" %date if glob.glob(filename): print("already cont normalized") data.test_flux = np.load(filename)['arr_0'] data.test_ivar = np.load(filename)['arr_1'] else: data.tr_ID = data.tr_ID[0] data.tr_flux = data.tr_flux[0,:] data.tr_ivar = data.tr_ivar[0,:] data.continuum_normalize_gaussian_smoothing(L=50) np.savez("output/%s_norm" %date, data.test_flux, data.test_ivar) # Infer labels errs, chisq = spectral_model.infer_labels(data) np.savez("output/%s_cannon_labels.npz" %date, data.test_label_vals) np.savez("./%s_formal_errors.npz" %date, errs) np.savez("./%s_chisq.npz" %date, chisq) # Make plots data.test_label_vals = data.test_label_vals[:,0:3] # so it doesn't try alpha data.set_label_names(['T_{eff}', '\log g', '[M/H]']) data.diagnostics_1to1(figname="%s_1to1_label" %date)
if glob.glob('cont.p'): cont = pickle.load(open('cont.p', 'r')) else: cont = dataset.fit_continuum(3, "sinusoid") pickle.dump(cont, open('cont.p', 'w')) # (3) CONTINUUM NORMALIZE norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \ dataset.continuum_normalize(cont) # replace with normalized values dataset.tr_flux = norm_tr_flux dataset.tr_ivar = norm_tr_ivar dataset.test_flux = norm_test_flux dataset.test_ivar = norm_test_ivar # (4) TRAINING STEP # learn the model from the reference_set model = model.CannonModel(dataset, 2) # 2 = quadratic model model.fit() # model.train would work equivalently. model.diagnostics() # (5) TEST STEP # infer labels with the new model for the test_set label_errs = model.infer_labels(dataset) dataset.diagnostics_test_step_flagstars() dataset.diagnostics_survey_labels() dataset.diagnostics_1to1()
def test_step(date): wl = np.load("wl.npz")['arr_0'] test_ID = np.load("output/%s_ids.npz" % date)['arr_0'] print(str(len(test_ID)) + " objects") test_flux = np.load("output/%s_norm.npz" % date)['arr_0'] test_ivar = np.load("output/%s_norm.npz" % date)['arr_1'] nlabels = 4 nobj = len(test_ID) lamost_label = np.load("output/%s_tr_label.npz" % date)['arr_0'] ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) m = model.CannonModel(2) m.coeffs = np.load("./coeffs.npz")['arr_0'] m.scatters = np.load("./scatters.npz")['arr_0'] m.chisqs = np.load("./chisqs.npz")['arr_0'] m.pivots = np.load("./pivots.npz")['arr_0'] nguesses = 7 starting_guesses = np.zeros((nguesses, nlabels)) hiT_hiG_hiM = np.array( [5.15273730e+03, 3.71762228e+00, 3.16861898e-01, 2.46907920e-02]) hiT_hiG_loM = np.array( [5.16350098e+03, 3.45917511e+00, -9.24426436e-01, 2.49296919e-01]) loT_loG_hiM = np.array( [4.04936841e+03, 1.47109437e+00, 2.07210138e-01, 1.49733415e-02]) loT_loG_loM = np.array( [4.00651318e+03, 8.35013509e-01, -8.98257852e-01, 7.65705928e-02]) high_alpha = np.array([[4750, 2.6, -0.096, 0.25]]) low_alpha = np.array([[4840, 2.67, -0.045, 0.049]]) low_feh = np.array([[4500, 1.45, -1.54, 0.24]]) starting_guesses[0, :] = hiT_hiG_hiM - m.pivots starting_guesses[1, :] = hiT_hiG_loM - m.pivots starting_guesses[2, :] = loT_loG_loM - m.pivots starting_guesses[3, :] = loT_loG_hiM - m.pivots starting_guesses[4, :] = high_alpha - m.pivots starting_guesses[5, :] = low_alpha - m.pivots starting_guesses[6, :] = low_feh - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) # 4,10955,4 chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c np.savez("output/%s_cannon_label_guesses.npz" % date, labels) np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("output/%s_all_cannon_labels.npz" % date, best_labels) np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq) np.savez("output/%s_cannon_label_errs.npz" % date, best_errs) ds.test_label_vals = best_labels ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" % date) ds.test_label_vals = best_labels[:, 0:3] ds.set_label_names(['T_{eff}', '\log g', '[M/H]']) ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
def test_step(): wl = np.load(SPEC_DIR + "/wl_cols.npz") ref_id_all = np.load(SPEC_DIR + "/ref_id_col.npz")['arr_0'] excised = np.load(SPEC_DIR + "/excised_obj/excised_ids.npz")['arr_0'] inds = np.array([np.where(ref_id_all == val)[0][0] for val in excised]) test_ID = ref_id_all[inds] print(str(len(test_ID)) + " objects") test_flux = np.load("%s/ref_flux_col.npz" % (SPEC_DIR))['arr_0'][inds] test_ivar = np.load("%s/ref_ivar_col.npz" % (SPEC_DIR))['arr_0'][inds] apogee_label = np.load("%s/ref_label.npz" % (SPEC_DIR))['arr_0'][inds] #np.savez("excised_label.npz", apogee_label) ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :], apogee_label, test_ID, test_flux, test_ivar) ds.set_label_names([ 'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/Fe]', 'A_k' ]) np.savez("excised_snr.npz", ds.test_SNR) print("DONE") m = model.CannonModel(2) m.coeffs = np.load(MODEL_DIR + "/coeffs.npz")['arr_0'] m.scatters = np.load(MODEL_DIR + "/scatters.npz")['arr_0'] m.chisqs = np.load(MODEL_DIR + "/chisqs.npz")['arr_0'] m.pivots = np.load(MODEL_DIR + "/pivots.npz")['arr_0'] nlabels = len(m.pivots) nobj = len(test_ID) nguesses = 7 choose = np.random.randint(0, nobj, size=nguesses) print(apogee_label.shape) print(choose.shape) print(m.pivots.shape) starting_guesses = apogee_label[choose] - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("excised_all_cannon_labels.npz", best_labels) np.savez("excised_cannon_label_chisq.npz", best_chisq) np.savez("excised_cannon_label_errs.npz", best_errs) ds.test_label_vals = best_labels ds.diagnostics_1to1(figname="excised_1to1_test_label")
colors = colors[np.argsort(tr_ID)] ivars = ivars[np.argsort(tr_ID)] ivars = ivars * 1e15 # add another column to the tr_flux, tr_ivar, test_flux, test_ivar logwl = np.log(data.wl) delta = logwl[1]-logwl[0] toadd = logwl[-1]+delta*np.arange(1,5) new_logwl = np.hstack((logwl, toadd)) data.wl = np.exp(new_logwl) data.tr_flux = np.hstack((data.tr_flux, colors)) data.test_flux = data.tr_flux data.tr_ivar = np.hstack((data.tr_ivar, ivars)) data.test_ivar = data.tr_ivar # train model m = model.CannonModel(2) # 2 = quadratic model m.fit(data) m.infer_labels(data) # data.diagnostics_1to1() def scatter(i): return np.std(data.tr_label[:,i]-data.test_label_vals[:,i]) def bias(i): return np.mean(data.tr_label[:,i]-data.test_label_vals[:,i]) for i in range(0,4): print(scatter(i), bias(i))
def loop(num_sets): wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0'] label_names = np.load("%s/label_names.npz" % DATA_DIR)['arr_0'] ref_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0'] #ref_choose = np.load("%s/ref_id_culled.npz" %DATA_DIR)['arr_0'] #inds = np.array([np.where(ref_id==val)[0][0] for val in ref_choose]) #ref_id = ref_id[inds] ref_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0'] ref_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0'] ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'] np.savez("ref_label.npz", ref_label) assignments = np.load("%s/assignments.npz" % DATA_DIR)['arr_0'] print("looping through %s sets" % num_sets) for leave_out in range(0, num_sets): print("leaving out %s" % leave_out) training = assignments != leave_out test = assignments == leave_out tr_id = ref_id[training] tr_flux = ref_flux[training] tr_ivar = ref_ivar[training] tr_ivar[np.isnan(tr_ivar)] = 0.0 tr_label = ref_label[training] #np.savez( # "tr_set_%s.npz" %leave_out, # tr_id, tr_flux, tr_ivar, tr_label) test_id = ref_id[test] test_flux = ref_flux[test] test_ivar = ref_ivar[test] test_ivar[np.isnan(test_ivar)] = 0.0 test_label = ref_label[test] #np.savez( # "test_set_%s.npz" %leave_out, # test_id, test_flux, test_ivar, test_label) ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id, test_flux, test_ivar) ds.set_label_names(label_names) fig = ds.diagnostics_SNR() plt.savefig("SNRdist_%s.png" % leave_out) plt.close() #fig = ds.diagnostics_ref_labels() #plt.savefig("ref_label_triangle_%s.png" %leave_out) #plt.close() #np.savez("tr_snr_%s.npz" %leave_out, ds.tr_SNR) modelf = "model_%s.npz" % leave_out if glob.glob(modelf): print("model already exists") coeffs = np.load(modelf)['arr_0'] scatters = np.load(modelf)['arr_1'] chisqs = np.load(modelf)['arr_2'] pivots = np.load(modelf)['arr_3'] m = model.CannonModel(2) m.coeffs = coeffs m.scatters = scatters m.chisqs = chisqs m.pivots = pivots else: m = train(ds, leave_out) ds.tr_label = test_label validate(ds, m, leave_out)
def __init__(self, training_set, label_names, wavelength_arms=None, censors=None, progress_bar=False, threads=None, tolerance=None, polynomial_order=2, load_from_file=None, debugging=False): """ Instantiate the Cannon and train it on the spectra contained within a SpectrumArray. :param training_set: A SpectrumArray containing the spectra to train the Cannon on. :param label_names: A list of the names of the labels the Cannon is to estimate. We require that all of the training spectra have metadata fields defining all of these labels. :param wavelength_arms: A list of the wavelength break-points between arms which should have continuum fitted separately. For compatibility we accept this argument, but it is not used for continuum-normalised spectra. :param threads: The number of CPU cores we should use. If None, we look up how many cores this computer has. :param tolerance: The tolerance xtol which the method <scipy.optimize.fmin_powell> uses to determine convergence. :param polynomial_order: The order of polynomials to use as fitting functions within the Cannon. :param load_from_file: The filename of the internal state of a pre-trained Cannon, which we should load rather than doing training from scratch. :param debugging: Boolean flag determining whether we produce debugging output :type debugging: bool """ assert polynomial_order == 2, "Anna Ho's Cannon only supports quadratic polynomials. " \ "You requested <polynomial_order={}>.".format(polynomial_order) assert censors == None, "Anna Ho's Cannon does not support censoring. " \ "But you requested that it should be enabled." self._debugging_output_counter = 0 self._debugging = debugging self.cannon_version = "AnnaHo" self._label_names = label_names self._wavelength_arms = wavelength_arms logger.info("Wavelength arm breakpoints: {}".format( self._wavelength_arms)) assert isinstance(training_set, fourgp_speclib.SpectrumArray), \ "Training set for the Cannon should be a SpectrumArray." # Hook for normalising input spectra training_set = self.normalise(training_set) self._training_set = training_set # Turn error bars on fluxes into inverse variances inverse_variances = training_set.value_errors**(-2) # Flag bad data points ignore = (training_set.values < 0) + ~np.isfinite(inverse_variances) inverse_variances[ignore] = 0 training_set.values[ignore] = 1 # Check that labels are correctly set in metadata for index in range(len(training_set)): metadata = training_set.get_metadata(index) for label in label_names: assert label in metadata, "Label <{}> not set on training spectrum number {}. " \ "Labels on this spectrum are: {}.".format( label, index, ", ".join(list(metadata.keys()))) assert np.isfinite(metadata[label]), "Label <{}> is not finite on training spectrum number {}. " \ "Labels on this spectrum are: {}.".format( label, index, metadata) # Compile table of training values of labels from metadata contained in SpectrumArray dataset = ho_dataset.Dataset( wl=training_set.wavelengths, tr_ID=range(len(training_set)), tr_flux=training_set.values, tr_ivar=inverse_variances, tr_label=np.array([ np.array([ training_set.get_metadata(index)[label] for label in label_names ]) for index in range(len(training_set)) ]), test_ID=[], test_flux=[], test_ivar=[]) dataset.set_label_names(names=label_names) self._model = ho_model.CannonModel(order=2, useErrors=False) if load_from_file is None: logger.info("Starting to train the Cannon") self._model.train(ds=dataset) logger.info("Cannon training completed") else: logger.info("Loading Cannon from disk") self._model = pickle.load(file=open(load_from_file, "rb")) logger.info("Cannon loaded successfully")