Пример #1
0
def prep_data(date):
    dir_files = "/home/annaho/xcalib_4labels/test_obj" 
    dir_dat = "/home/share/LAMOST/DR2/DR2_release/"
    test_ID = np.loadtxt("%s/%s_test_obj.txt" %(dir_files, date), dtype=str)
    print("%s obj" %len(test_ID))
    np.savez("output/%s_ids.npz" %date, test_ID)
    test_ID_long = np.array([dir_dat + f for f in test_ID])
    wl, test_flux, test_ivar, npix, SNRs = load_spectra(test_ID_long)
    np.savez("output/%s_SNRs.npz" %date, SNRs)
    np.savez("output/%s_frac_good_pix.npz" %date, npix)

    lamost_info = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0']
    inds = np.array([np.where(lamost_info[:,0]==a)[0][0] for a in test_ID])
    nstars = len(test_ID)
    lamost_info_sorted = np.zeros((nstars,4))
    lamost_label = lamost_info[inds,:][:,1:].astype(float)
    lamost_info_sorted[:,0:3] = lamost_label
    np.savez("output/%s_tr_label" %date, lamost_label)

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, 
            test_ID, test_flux, test_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
    ds.diagnostics_SNR(figname="%s_SNRdist.png" %date)

    ds.continuum_normalize_gaussian_smoothing(L=50)
    np.savez("output/%s_norm.npz" %date, ds.test_flux, ds.test_ivar)
Пример #2
0
def test_step(date):
    wl = np.load("../run_2_train_on_good/wl.npz")['arr_0']
    test_ID = np.load("%s_test_ids.npz" %date)['arr_0']
    test_flux = np.load("%s_test_flux.npz" %date)['arr_0']
    test_ivar = np.load("%s_test_ivar.npz" %date)['arr_0']

    nlabels = 4
    nobj = len(test_ID)

    lamost_label_3 = np.load("%s_lamost_label.npz" %date)['arr_0']
    # add extra column to make it symmetric with the inferred test labels
    toadd = np.ones(nobj)[...,None]
    lamost_label = np.hstack((lamost_label_3, toadd))

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, 
            test_ID, test_flux, test_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])

    m = model.CannonModel(2)
    m.coeffs = np.load("../run_5_train_on_good/coeffs.npz")['arr_0']
    m.scatters = np.load("../run_5_train_on_good/scatters.npz")['arr_0']
    m.chisqs = np.load("../run_5_train_on_good/chisqs.npz")['arr_0']
    m.pivots = np.load("../run_5_train_on_good/pivots.npz")['arr_0']

    nguesses = 4
    starting_guesses = np.zeros((nguesses,nlabels)) 
    hiT_hiG_hiM = np.array([  5.15273730e+03,   3.71762228e+00,   3.16861898e-01, 2.46907920e-02])
    hiT_hiG_loM = np.array([  5.16350098e+03,   3.45917511e+00,  -9.24426436e-01, 2.49296919e-01])
    loT_loG_hiM = np.array([  4.04936841e+03,   1.47109437e+00,   2.07210138e-01, 1.49733415e-02])
    loT_loG_loM = np.array([  4.00651318e+03,   8.35013509e-01,  -8.98257852e-01, 7.65705928e-02])
    starting_guesses[0,:] = hiT_hiG_hiM-m.pivots
    starting_guesses[1,:] = hiT_hiG_loM-m.pivots
    starting_guesses[2,:] = loT_loG_loM-m.pivots
    starting_guesses[3,:] = loT_loG_hiM-m.pivots

    labels = np.zeros((nguesses, nobj, nlabels)) # 4,10955,4
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)
    
    for ii,guess in enumerate(starting_guesses):
        a,b,c = test_step_iteration(ds,m,starting_guesses[ii])
        labels[ii,:] = a
        chisq[ii,:] = b
        errs[ii,:] = c

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj,val in enumerate(choose):
        best_labels[jj,:] = labels[:,jj,:][val]
        best_errs[jj,:] = errs[:,jj,:][val]

    np.savez("./%s_all_cannon_labels.npz" %date, best_labels)
    np.savez("./%s_cannon_label_chisq.npz" %date, best_chisq)
    np.savez("./%s_cannon_label_errs.npz" %date, best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date)
    ds.diagnostics_1to1(figname = "%s_1to1_test_label.png" %date)
Пример #3
0
def test_step(date):
    direc = "../xcalib_4labels"
    wl = np.load("%s/wl.npz" % direc)['arr_0']
    test_ID = np.load("%s/output/%s_ids.npz" % (direc, date))['arr_0']
    print(str(len(test_ID)) + " objects")
    test_flux = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_0']
    test_ivar = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_1']

    lamost_label = np.load("%s/output/%s_tr_label.npz" %
                           (direc, date))['arr_0']
    apogee_label = np.load("./tr_label.npz")['arr_0']

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :],
                         lamost_label, test_ID, test_flux, test_ivar)
    ds.set_label_names(
        ['T_{eff}', '\log g', '[Fe/H]', '[\\alpha/Fe]', 'log M', 'A_k'])

    m = model.CannonModel(2)
    m.coeffs = np.load("./coeffs.npz")['arr_0']
    m.scatters = np.load("./scatters.npz")['arr_0']
    m.chisqs = np.load("./chisqs.npz")['arr_0']
    m.pivots = np.load("./pivots.npz")['arr_0']

    nlabels = len(m.pivots)
    nobj = len(test_ID)

    nguesses = 7
    choose = np.random.randint(0, apogee_label.shape[0], size=nguesses)
    starting_guesses = apogee_label[choose, :] - m.pivots

    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    np.savez("output/%s_cannon_label_guesses.npz" % date, labels)
    np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("output/%s_all_cannon_labels.npz" % date, best_labels)
    np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq)
    np.savez("output/%s_cannon_label_errs.npz" % date, best_errs)

    ds.test_label_vals = best_labels
    #ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date)
    ds.test_label_vals = best_labels[:, 0:3]
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
Пример #4
0
def normalize_test_set():
    wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0']
    test_id = np.load("%s/test_id.npz" %DATA_DIR)['arr_0']
    test_flux = np.load("%s/test_flux.npz" %DATA_DIR)['arr_0']
    test_ivar = np.load("%s/test_ivar_corr.npz" %DATA_DIR)['arr_0']
    test_scat = np.load("%s/test_spec_scat.npz" %DATA_DIR)['arr_0']

    contmask = np.load("%s/wl_contmask.npz" %DATA_DIR)['arr_0']

    ds = dataset.Dataset(
            wl, test_id[0:2], test_flux[0:2], test_ivar[0:2], wl, 
            test_id, test_flux, test_ivar)
    ds.set_continuum(contmask)

    # For the sake of the normalization, no pixel with flux >= 3 sigma
    # should be continuum. 

    for ii,spec in enumerate(ds.test_flux): 
        err = test_scat[ii]
        bad = np.logical_and(
                ds.contmask == True, np.abs(1-spec) >= 3*err)
        ds.test_ivar[ii][bad] = SMALL

    cont = ds.fit_continuum(3, "sinusoid")
    np.savez("%s/test_cont.npz" %DATA_DIR, cont)
    norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \
            ds.continuum_normalize(cont)
    bad = np.logical_or(test_flux <= 0, test_flux > 1.1)
    norm_test_ivar[bad] = 0.0
    np.savez("%s/test_flux_norm.npz" %DATA_DIR, norm_test_flux)
    np.savez("%s/test_ivar_norm.npz" %DATA_DIR, norm_test_ivar)
Пример #5
0
def train():
    wl = np.load("%s/../wl_cols.npz" % SPEC_DIR)['arr_0']
    tr_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0']
    tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0']
    tr_label = tr_label[:, 0:3]
    tr_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0']
    tr_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0']

    ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux,
                         tr_ivar)
    # teff, logg, mh, cm, nm, am, ak
    ds.set_label_names(['T_{eff}', '\log g', '[Fe/H]'])  #, '[C/M]','[N/M]',
    #'[\\alpha/M]', 'A_k'])
    #ds.diagnostics_SNR()
    #ds.diagnostics_ref_labels()
    #np.savez("ref_snr.npz", ds.tr_SNR)

    print("Training model")
    nlab = ds.tr_label.shape[1]
    print(nlab)
    npix = len(ds.wl)
    print(npix)
    filt = np.ones((nlab, npix), dtype=bool)
    print(filt)
    #filt[nlab-1,0:500] = 0
    m = model.CannonModel(2, wl_filter=filt)
    m.fit(ds)
    np.savez("./coeffs.npz", m.coeffs)
    np.savez("./scatters.npz", m.scatters)
    np.savez("./chisqs.npz", m.chisqs)
    np.savez("./pivots.npz", m.pivots)
    m.diagnostics_leading_coeffs(ds)
Пример #6
0
def test_step():
    #wl = np.load("%s/wl.npz" %SPEC_DIR)['arr_0']
    wl = np.load("wl_cols.npz")['arr_0']
    test_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0']
    tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0']
    test_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0']
    test_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0']

    #tr_id = np.load("./ref_id.npz")['arr_0']
    #tr_flux = np.load("./ref_flux.npz")['arr_0']
    #tr_ivar = np.load("./ref_ivar.npz")['arr_0']
    #tr_label = np.load("./ref_label.npz")['arr_0']

    ds = dataset.Dataset(wl, test_id, test_flux, test_ivar, tr_label, test_id,
                         test_flux, test_ivar)
    ds.set_label_names([
        'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k'
    ])

    m = model.CannonModel(2)
    m.coeffs = np.load("./culled_coeffs.npz")['arr_0']
    m.scatters = np.load("./culled_scatters.npz")['arr_0']
    m.chisqs = np.load("./culled_chisqs.npz")['arr_0']
    m.pivots = np.load("./culled_pivots.npz")['arr_0']

    nguesses = 10
    nobj = len(ds.test_ID)
    nlabels = len(m.pivots)
    choose = np.random.randint(0, nobj, size=nguesses)
    starting_guesses = ds.tr_label[choose] - m.pivots
    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    #np.savez("labels_all_starting_vals.npz", labels)
    #np.savez("chisq_all_starting_vals.npz", chisq)
    #np.savez("errs_all_starting_vals.npz", errs)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros(tr_label.shape)
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("./cannon_label_vals.npz", best_labels)
    np.savez("./cannon_label_chisq.npz", best_chisq)
    np.savez("./cannon_label_errs.npz", best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_survey_labels()
    ds.diagnostics_1to1(figname="1to1_test_label")
def get_normed_spectra():
    """ Spectra to compare with models """
    filenames = np.array(
            [DATA_DIR + "/Spectra" + "/" + val for val in lamost_id])
    grid, fluxes, ivars, npix, SNRs = lamost.load_spectra(
            lamost_id, input_grid=wl)
    ds = dataset.Dataset(
            wl, lamost_id, fluxes, ivars, [1], 
            lamost_id[0:2], fluxes[0:2], ivars[0:2])
    ds.continuum_normalize_gaussian_smoothing(L=50)
    np.savez(DATA_DIR + "/" + "norm_flux.npz", ds.tr_flux)
    np.savez(DATA_DIR + "/" + "norm_ivar.npz", ds.tr_ivar)
    return ds.tr_flux, ds.tr_ivar
Пример #8
0
    def infer_labels_from_spectra(self, fluxes_in, flux_vars_in):

        lis = range(fluxes_in.shape[0])
        ids = ["{:02d}".format(x) for x in lis]
        ds_test = dataset.Dataset(self.m_train_dataset_newtype.m_wavelength,
                                  ids, self.m_train_dataset_newtype.m_spectra,
                                  self.m_train_dataset_newtype.m_spectra_ivar,
                                  self.m_train_dataset_newtype.m_label, ids,
                                  fluxes_in, 1.0 / flux_vars_in)
        ds_test.set_label_names(['L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7'])

        _, _ = self.m_model.infer_labels(ds_test)

        return ds_test.test_label_vals
Пример #9
0
    def fit_spectrum(self, spectrum):
        """
        Fit stellar labels to a continuum-normalised spectrum.

        :param spectrum:
            A Spectrum object containing the spectrum for the Cannon to fit.

        :type spectrum:
            Spectrum

        :return:
        """

        assert isinstance(spectrum, fourgp_speclib.Spectrum), \
            "Supplied spectrum for the Cannon to fit is not a Spectrum object."

        assert spectrum.raster_hash == self._training_set.raster_hash, \
            "Supplied spectrum for the Cannon to fit is not sampled on the same raster as the training set."

        # Hook for normalising input spectra
        spectrum = self.normalise(spectrum)

        inverse_variances = spectrum.value_errors**(-2)

        # Ignore bad pixels.
        bad = (spectrum.value_errors <
               0) + (~np.isfinite(inverse_variances * spectrum.values))
        inverse_variances[bad] = 0
        spectrum.values[bad] = np.nan

        # Compile table of training values of labels from metadata contained in SpectrumArray
        dataset = ho_dataset.Dataset(wl=spectrum.wavelengths,
                                     tr_ID=[],
                                     tr_flux=[],
                                     tr_ivar=[],
                                     tr_label=[],
                                     test_ID=np.array((0, )),
                                     test_flux=np.array((spectrum.values, )),
                                     test_ivar=np.array((inverse_variances, )))

        dataset.set_label_names(names=self._label_names)
        errs_all, chisq_all = self._model.infer_labels(ds=dataset)

        labels = dataset.test_label_vals
        cov = errs_all
        meta = None

        return labels, cov, meta
Пример #10
0
def xvalidate():
    """ Train a model, leaving out a group corresponding
    to a random integer from 0 to 7, e.g. leave out 0. 
    Test on the remaining 1/8 of the sample. """

    print("Loading data")
    groups = np.load("ref_groups.npz")['arr_0']
    ref_label = np.load("%s/ref_label.npz" % direc_ref)['arr_0']
    ref_id = np.load("%s/ref_id.npz" % direc_ref)['arr_0']
    ref_flux = np.load("%s/ref_flux.npz" % direc_ref)['arr_0']
    ref_ivar = np.load("%s/ref_ivar.npz" % direc_ref)['arr_0']
    wl = np.load("%s/wl.npz" % direc_ref)['arr_0']

    num_models = 8

    for ii in np.arange(num_models):
        print("Leaving out group %s" % ii)
        train_on = groups != ii
        test_on = groups == ii

        tr_label = ref_label[train_on]
        tr_id = ref_id[train_on]
        tr_flux = ref_flux[train_on]
        tr_ivar = ref_ivar[train_on]
        print("Training on %s objects" % len(tr_id))
        test_label = ref_label[test_on]
        test_id = ref_id[test_on]
        test_flux = ref_flux[test_on]
        test_ivar = ref_ivar[test_on]
        print("Testing on %s objects" % len(test_id))

        print("Loading dataset...")
        ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id,
                             test_flux, test_ivar)
        ds.set_label_names(
            ['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE'])
        fig = ds.diagnostics_SNR()
        plt.savefig("ex%s_SNR.png" % ii)
        fig = ds.diagnostics_ref_labels()
        plt.savefig("ex%s_ref_label_triangle.png" % ii)
        np.savez("ex%s_tr_snr.npz" % ii, ds.tr_SNR)

        # train a model
        m = train(ds, ii)

        # test step
        ds.tr_label = test_label  # to compare the results
        test(ds, m, ii)
Пример #11
0
    def train(self, train_dataset):

        lis = range(train_dataset.m_label.shape[0])
        ids = ["{:02d}".format(x) for x in lis]

        self.m_train_dataset_newtype = deepcopy(train_dataset)
        self.m_train_dataset = dataset.Dataset(train_dataset.m_wavelength, ids,
                                               train_dataset.m_spectra,
                                               train_dataset.m_spectra_ivar,
                                               train_dataset.m_label, ids,
                                               train_dataset.m_spectra,
                                               train_dataset.m_spectra_ivar)
        self.m_train_dataset.set_label_names(
            np.array(range(train_dataset.m_label.shape[1]), dtype=np.str))

        self.m_model.fit(self.m_train_dataset)
Пример #12
0
def normalize_ref_set():
    wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0']
    ref_id = np.load("%s/ref_id.npz" %DATA_DIR)['arr_0']
    ref_flux = np.load("%s/ref_flux.npz" %DATA_DIR)['arr_0']
    ref_ivar = np.load("%s/ref_ivar.npz" %DATA_DIR)['arr_0']
    ref_label = np.load("%s/ref_label.npz" %DATA_DIR)['arr_0']

    ds = dataset.Dataset(
            wl, ref_id, ref_flux, ref_ivar, ref_label, 
            ref_id, ref_flux, ref_ivar)
    contmask = np.load("%s/wl_contmask.npz" %DATA_DIR)['arr_0']
    ds.set_continuum(contmask)

    cont = ds.fit_continuum(3, "sinusoid")
    np.savez("%s/ref_cont.npz" %DATA_DIR, cont)
    norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \
            ds.continuum_normalize(cont)
    bad = np.logical_or(ref_flux <= 0, ref_flux > 1.1)
    norm_tr_ivar[bad] = 0.0
    np.savez("%s/ref_flux_norm.npz" %DATA_DIR, norm_tr_flux)
    np.savez("%s/ref_ivar_norm.npz" %DATA_DIR, norm_tr_ivar)
Пример #13
0
def test_step(date):
    wl = np.load("%s/wl_cols.npz" % MODEL_DIR)['arr_0']
    test_ID = np.load("%s/output/%s_ids.npz" % (SPEC_DIR, date))['arr_0']
    print(str(len(test_ID)) + " objects")
    test_flux_temp = np.load("%s/output/%s_norm.npz" %
                             (SPEC_DIR, date))['arr_0']
    test_ivar_temp = np.load("%s/output/%s_norm.npz" %
                             (SPEC_DIR, date))['arr_1']

    # Mask
    mask = np.load("mask.npz")['arr_0']
    test_ivar_masked = apply_mask(wl[0:3626], test_ivar_temp, mask)

    # Append colors
    col = np.load(COL_DIR + "/" + date + "_col.npz")['arr_0']
    col_ivar = np.load(COL_DIR + "/" + date + "_col_ivar.npz")['arr_0']
    bad_flux = np.logical_or(np.isnan(col), col == np.inf)
    col[bad_flux] = 1.0
    col_ivar[bad_flux] = 0.0
    bad_ivar = np.logical_or(np.isnan(col_ivar), col_ivar == np.inf)
    col_ivar[bad_ivar] = 0.0
    test_flux = np.hstack((test_flux_temp, col.T))
    test_ivar = np.hstack((test_ivar_temp, col_ivar.T))

    lamost_label = np.load("%s/output/%s_tr_label.npz" %
                           (SPEC_DIR, date))['arr_0']
    apogee_label = np.load("./ref_label.npz")['arr_0']

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :],
                         lamost_label, test_ID, test_flux, test_ivar)

    #np.savez(COL_DIR + "/%s_test_flux.npz" %date, ds.test_flux)
    #np.savez(COL_DIR + "/%s_test_ivar.npz" %date, ds.test_ivar)
    np.savez(COL_DIR + "/%s_test_snr.npz" % date, ds.test_SNR)
    np.savez(COL_DIR + "/%s_test_id.npz" % date, ds.test_ID)

    ds.set_label_names([
        'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/Fe]', 'A_k'
    ])
Пример #14
0
def load_dataset(ii):
    ("loading data")
    groups = np.load("ref_groups.npz")['arr_0']
    ref_label = np.load("%s/ref_label.npz" % direc_ref)['arr_0']
    ref_id = np.load("%s/ref_id.npz" % direc_ref)['arr_0']
    ref_flux = np.load("%s/ref_flux.npz" % direc_ref)['arr_0']
    ref_ivar = np.load("%s/ref_ivar.npz" % direc_ref)['arr_0']
    wl = np.load("%s/wl.npz" % direc_ref)['arr_0']

    print("Leaving out group %s" % ii)
    train_on = groups != ii
    test_on = groups == ii

    tr_label = ref_label[train_on]
    tr_id = ref_id[train_on]
    tr_flux = ref_flux[train_on]
    tr_ivar = ref_ivar[train_on]
    print("Training on %s objects" % len(tr_id))
    test_label = ref_label[test_on]
    test_id = ref_id[test_on]
    test_flux = ref_flux[test_on]
    test_ivar = ref_ivar[test_on]
    print("Testing on %s objects" % len(test_id))

    print("Loading dataset...")
    ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id,
                         test_flux, test_ivar)

    ds.test_label_vals = np.load("./ex%s_cannon_label_vals.npz" %
                                 group)['arr_0']
    print(ds.test_label_vals.shape)
    ds.set_label_names(
        ['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE'])
    fig = ds.diagnostics_SNR()
    plt.savefig("ex%s_SNR.png" % ii)
    fig = ds.diagnostics_ref_labels()
    plt.savefig("ex%s_ref_label_triangle.png" % ii)
    np.savez("ex%s_tr_snr.npz" % ii, ds.tr_SNR)
    return ds
Пример #15
0
def load_all_ref_spectra(ref_id):
    DATA_DIR = "/Users/annaho/Data/LAMOST/Label_Transfer"
    wl = np.load(DATA_DIR + "/../Abundances/wl_cols.npz")['arr_0']
    all_ref_ivar = np.load("%s/tr_ivar.npz" %DATA_DIR)['arr_0']
    all_ref_flux = np.load("%s/tr_flux.npz" %DATA_DIR)['arr_0']
    all_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0']
    all_id = np.array([val.decode('utf-8') for val in all_id])
    inds = np.array([np.where(all_id==val)[0][0] for val in ref_id])
    ref_flux = all_ref_flux[inds]
    ref_ivar = all_ref_ivar[inds]

    mask = np.load("%s/../Abundances/mask.npz" %DATA_DIR)['arr_0']
    ref_ivar_masked = apply_mask(wl[0:3626], ref_ivar, mask)
    ref_id_col, ref_flux_col, ref_ivar_col = find_colors(
            ref_id, ref_flux, ref_ivar_masked)
    np.savez("ref_id_col.npz", ref_id_col)
    np.savez("ref_flux.npz", ref_flux_col)
    np.savez("ref_ivar.npz", ref_ivar_col)
    ds = dataset.Dataset(
            wl[0:3626], ref_id_col, ref_flux_col[:,3626], ref_ivar_col[:,3626], 
            [], [], [], [])
    np.savez("ref_snr.npz", ds.tr_SNR)
Пример #16
0
def train():
    wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0']
    tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0']
    tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0']
    tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0']
    tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0']
    val_id = np.load("%s/val_id.npz" %DATA_DIR)['arr_0']
    val_flux = np.load("%s/val_flux_norm.npz" %DATA_DIR)['arr_0']
    val_ivar = np.load("%s/val_ivar_norm.npz" %DATA_DIR)['arr_0']

    ds = dataset.Dataset(
            wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4], 
            val_id, val_flux, val_ivar)
    ds.set_label_names(["Teff", "logg", "FeH", 'aFe'])

    np.savez("%s/tr_SNR.npz" %DATA_DIR, ds.tr_SNR)

    fig = ds.diagnostics_SNR()
    plt.savefig("%s/SNR_dist.png" %DATA_DIR)
    plt.close()

    fig = ds.diagnostics_ref_labels()
    plt.savefig("%s/ref_label_triangle.png" %DATA_DIR)
    plt.close()

    md = model.CannonModel(2)
    md.fit(ds)

    fig = md.diagnostics_leading_coeffs(ds)
    plt.savefig("%s/leading_coeffs.png" %DATA_DIR)
    plt.close()

    np.savez("%s/coeffs.npz" %DATA_DIR, md.coeffs)
    np.savez("%s/scatters.npz" %DATA_DIR, md.scatters)
    np.savez("%s/chisqs.npz" %DATA_DIR, md.chisqs)
    np.savez("%s/pivots.npz" %DATA_DIR, md.pivots)
Пример #17
0
def train():
    # Load training set
    wl = np.load("../data/wl.npz")['arr_0']
    tr_id = np.load("tr_id.npz")['arr_0']
    tr_label = np.load("tr_label.npz")['arr_0']
    tr_flux = np.load("tr_flux.npz")['arr_0']
    tr_ivar = np.load("tr_ivar.npz")['arr_0']

    ds = dataset.Dataset(
            wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux, tr_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE'])
    ds.diagnostics_SNR()
    ds.diagnostics_ref_labels()
    np.savez("./tr_snr.npz", ds.tr_SNR)

    m = model.CannonModel(2)
    m.fit(ds)
    np.savez("./coeffs.npz", m.coeffs)
    np.savez("./scatters.npz", m.scatters)
    np.savez("./chisqs.npz", m.chisqs)
    np.savez("./pivots.npz", m.pivots)
    m.diagnostics_leading_coeffs(ds)
    m.diagnostics_leading_coeffs_triangle(ds)
    m.diagnostics_plot_chisq(ds)
Пример #18
0
def load_dataset(date):
    """ Load the dataset for a single date 
    
    Parameters
    ----------
    date: the date (string) for which to load the data & dataset

    Returns
    -------
    ds: the dataset object
    """
    LAB_DIR = "/home/annaho/TheCannon/data/lamost"
    WL_DIR = "/home/annaho/TheCannon/code/lamost/mass_age/cn"
    SPEC_DIR = "/home/annaho/TheCannon/code/apogee_lamost/xcalib_4labels/output"
    wl = np.load(WL_DIR + "/wl_cols.npz")['arr_0'][0:3626]  # no cols
    ds = dataset.Dataset(wl, [], [], [], [], [], [], [])
    test_label = np.load("%s/%s_all_cannon_labels.npz" %
                         (LAB_DIR, date))['arr_0']
    ds.test_label_vals = test_label
    a = np.load("%s/%s_norm.npz" % (SPEC_DIR, date))
    ds.test_flux = a['arr_0']
    ds.test_ivar = a['arr_1']
    ds.test_ID = np.load("%s/%s_ids.npz" % (SPEC_DIR, date))['arr_0']
    return ds
Пример #19
0
def loop(num_sets):
    wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0']
    label_names = np.load("%s/label_names.npz" % DATA_DIR)['arr_0']
    ref_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0']
    #ref_choose = np.load("%s/ref_id_culled.npz" %DATA_DIR)['arr_0']
    #inds = np.array([np.where(ref_id==val)[0][0] for val in ref_choose])
    #ref_id = ref_id[inds]
    ref_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0']
    ref_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0']
    ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0']
    np.savez("ref_label.npz", ref_label)
    assignments = np.load("%s/assignments.npz" % DATA_DIR)['arr_0']

    print("looping through %s sets" % num_sets)
    for leave_out in range(0, num_sets):
        print("leaving out %s" % leave_out)
        training = assignments != leave_out
        test = assignments == leave_out
        tr_id = ref_id[training]
        tr_flux = ref_flux[training]
        tr_ivar = ref_ivar[training]
        tr_ivar[np.isnan(tr_ivar)] = 0.0
        tr_label = ref_label[training]
        #np.savez(
        #    "tr_set_%s.npz" %leave_out,
        #    tr_id, tr_flux, tr_ivar, tr_label)
        test_id = ref_id[test]
        test_flux = ref_flux[test]
        test_ivar = ref_ivar[test]
        test_ivar[np.isnan(test_ivar)] = 0.0
        test_label = ref_label[test]
        #np.savez(
        #    "test_set_%s.npz" %leave_out,
        #    test_id, test_flux, test_ivar, test_label)
        ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id,
                             test_flux, test_ivar)
        ds.set_label_names(label_names)
        fig = ds.diagnostics_SNR()
        plt.savefig("SNRdist_%s.png" % leave_out)
        plt.close()
        #fig = ds.diagnostics_ref_labels()
        #plt.savefig("ref_label_triangle_%s.png" %leave_out)
        #plt.close()
        #np.savez("tr_snr_%s.npz" %leave_out, ds.tr_SNR)

        modelf = "model_%s.npz" % leave_out
        if glob.glob(modelf):
            print("model already exists")
            coeffs = np.load(modelf)['arr_0']
            scatters = np.load(modelf)['arr_1']
            chisqs = np.load(modelf)['arr_2']
            pivots = np.load(modelf)['arr_3']
            m = model.CannonModel(2)
            m.coeffs = coeffs
            m.scatters = scatters
            m.chisqs = chisqs
            m.pivots = pivots
        else:
            m = train(ds, leave_out)
        ds.tr_label = test_label
        validate(ds, m, leave_out)
Пример #20
0
labeldir = "/Users/annaho/Github/TheCannon/data/LAMOST/Label_Transfer"
inputf = pyfits.open("%s/Ho_et_all_catalog_resubmit.fits" %labeldir)
cat = inputf[1].data
inputf.close()

inds = np.array([np.where(cat['LAMOST_ID']==val)[0][0] for val in ids])
ra = cat['RA'][inds]
dec = cat['Dec'][inds]
teff = cat['Teff'][inds]
logg = cat['logg'][inds]
mh = cat['FeH'][inds]
alpham = cat['alphaM'][inds]
ak = 0.05*np.ones(len(inds))
lab = np.vstack((teff,logg,mh,alpham,ak))

ds = dataset.Dataset(
        wl, ids, norm_flux, norm_ivar, lab, ids, norm_flux, norm_ivar)

ds.test_label_vals = lab.T

# generate model test spectra
m.infer_spectra(ds)

Cinv = ds.test_ivar / (1 + ds.test_ivar*m.scatters**2)
#res = Cinv*(ds.test_flux - m.model_spectra)**2
res = (ds.test_flux - m.model_spectra)

# get height above the plane
c = SkyCoord(ra, dec, unit='deg')
lat = np.abs(c.icrs.galactic.b)

for ii in range(0, len(ids)):
Пример #21
0
def test_step(date):
    wl = np.load("wl.npz")['arr_0']
    test_ID = np.load("output/%s_ids.npz" % date)['arr_0']
    print(str(len(test_ID)) + " objects")
    test_flux = np.load("output/%s_norm.npz" % date)['arr_0']
    test_ivar = np.load("output/%s_norm.npz" % date)['arr_1']

    nlabels = 4
    nobj = len(test_ID)

    lamost_label = np.load("output/%s_tr_label.npz" % date)['arr_0']

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :],
                         lamost_label, test_ID, test_flux, test_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])

    m = model.CannonModel(2)
    m.coeffs = np.load("./coeffs.npz")['arr_0']
    m.scatters = np.load("./scatters.npz")['arr_0']
    m.chisqs = np.load("./chisqs.npz")['arr_0']
    m.pivots = np.load("./pivots.npz")['arr_0']

    nguesses = 7
    starting_guesses = np.zeros((nguesses, nlabels))
    hiT_hiG_hiM = np.array(
        [5.15273730e+03, 3.71762228e+00, 3.16861898e-01, 2.46907920e-02])
    hiT_hiG_loM = np.array(
        [5.16350098e+03, 3.45917511e+00, -9.24426436e-01, 2.49296919e-01])
    loT_loG_hiM = np.array(
        [4.04936841e+03, 1.47109437e+00, 2.07210138e-01, 1.49733415e-02])
    loT_loG_loM = np.array(
        [4.00651318e+03, 8.35013509e-01, -8.98257852e-01, 7.65705928e-02])
    high_alpha = np.array([[4750, 2.6, -0.096, 0.25]])
    low_alpha = np.array([[4840, 2.67, -0.045, 0.049]])
    low_feh = np.array([[4500, 1.45, -1.54, 0.24]])
    starting_guesses[0, :] = hiT_hiG_hiM - m.pivots
    starting_guesses[1, :] = hiT_hiG_loM - m.pivots
    starting_guesses[2, :] = loT_loG_loM - m.pivots
    starting_guesses[3, :] = loT_loG_hiM - m.pivots
    starting_guesses[4, :] = high_alpha - m.pivots
    starting_guesses[5, :] = low_alpha - m.pivots
    starting_guesses[6, :] = low_feh - m.pivots

    labels = np.zeros((nguesses, nobj, nlabels))  # 4,10955,4
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    np.savez("output/%s_cannon_label_guesses.npz" % date, labels)
    np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("output/%s_all_cannon_labels.npz" % date, best_labels)
    np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq)
    np.savez("output/%s_cannon_label_errs.npz" % date, best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %
                                 date)
    ds.test_label_vals = best_labels[:, 0:3]
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
Пример #22
0
def loop(num_sets):
    wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0']
    label_names = [
        'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k'
    ]
    ref_id = np.load("%s/ref_id_col.npz" % SPEC_DIR)['arr_0']
    ref_choose = np.load("%s/ref_id.npz" % DATA_DIR)['arr_0']
    inds = np.array([np.where(ref_id == val)[0][0] for val in ref_choose])
    ref_id = ref_id[inds]
    ref_flux = np.load("%s/ref_flux_col.npz" % SPEC_DIR)['arr_0'][inds]
    ref_ivar = np.load("%s/ref_ivar_col.npz" % SPEC_DIR)['arr_0'][inds]
    np.savez("ref_id.npz", ref_id)
    np.savez("ref_flux.npz", ref_flux)
    np.savez("ref_ivar.npz", ref_ivar)
    ds = dataset.Dataset(wl[0:3626], ref_id, ref_flux[:, 0:3626],
                         ref_ivar[:, 0:3626], [], [], [], [])
    np.savez("ref_snr.npz", ds.tr_SNR)
    ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'][inds]
    #ref_label = np.load("%s/xval_cannon_label_vals.npz" %TR_LAB_DIR)['arr_0']
    np.savez("ref_label.npz", ref_label)
    assignments = np.load("%s/../assignments.npz" % DATA_DIR)['arr_0']

    print("looping through %s sets" % num_sets)
    for leave_out in range(0, num_sets):
        print("leaving out %s" % leave_out)
        training = assignments != leave_out
        test = assignments == leave_out
        tr_id = ref_id[training]
        tr_flux = ref_flux[training]
        tr_ivar = ref_ivar[training]
        tr_ivar[np.isnan(tr_ivar)] = 0.0
        tr_label = ref_label[training]
        np.savez("tr_set_%s.npz" % leave_out, tr_id, tr_flux, tr_ivar,
                 tr_label)
        test_id = ref_id[test]
        test_flux = ref_flux[test]
        test_ivar = ref_ivar[test]
        test_ivar[np.isnan(test_ivar)] = 0.0
        test_label = ref_label[test]
        np.savez("test_set_%s.npz" % leave_out, test_id, test_flux, test_ivar,
                 test_label)
        ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id,
                             test_flux, test_ivar)
        print(ds.wl)
        ds.set_label_names(label_names)
        fig = ds.diagnostics_SNR()
        plt.savefig("SNRdist_%s.png" % leave_out)
        plt.close()
        fig = ds.diagnostics_ref_labels()
        plt.savefig("ref_label_triangle_%s.png" % leave_out)
        plt.close()
        np.savez("tr_snr_%s.npz" % leave_out, ds.tr_SNR)

        modelf = "model_%s.npz" % leave_out
        if glob.glob(modelf):
            print("model already exists")
            coeffs = np.load(modelf)['arr_0']
            scatters = np.load(modelf)['arr_1']
            chisqs = np.load(modelf)['arr_2']
            pivots = np.load(modelf)['arr_3']
            m = model.CannonModel(2)
            m.coeffs = coeffs
            m.scatters = scatters
            m.chisqs = chisqs
            m.pivots = pivots
        else:
            m = train(ds, leave_out)
        ds.tr_label = test_label
        validate(ds, m, leave_out)
Пример #23
0
def test():
    wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0']
    tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0']
    tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0']
    tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0']
    test_id = np.load("%s/test_id.npz" %DATA_DIR)['arr_0']
    test_flux = np.load("%s/test_flux_norm.npz" %DATA_DIR)['arr_0']
    test_ivar = np.load("%s/test_ivar_norm.npz" %DATA_DIR)['arr_0']
    tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0']

    coeffs = np.load("%s/coeffs.npz" %DATA_DIR)['arr_0']
    scatters = np.load("%s/scatters.npz" %DATA_DIR)['arr_0']
    chisqs = np.load("%s/chisqs.npz" %DATA_DIR)['arr_0']
    pivots = np.load("%s/pivots.npz" %DATA_DIR)['arr_0']

    ds = dataset.Dataset(
            wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4],
            test_id, test_flux, test_ivar)

    np.savez("%s/test_SNR.npz" %DATA_DIR, ds.test_SNR)

    ds.set_label_names(["Teff", "logg", "FeH", "aFe"])
    md = model.CannonModel(2)
    md.coeffs = coeffs
    md.scatters = scatters
    md.chisqs = chisqs
    md.pivots = pivots
    md.diagnostics_leading_coeffs(ds)

    nguesses = 7
    nobj = len(ds.test_ID)
    nlabels = ds.tr_label.shape[1]
    choose = np.random.randint(0,nobj,size=nguesses)
    starting_guesses = ds.tr_label[choose]-md.pivots
    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    ds.tr_label = np.zeros((nobj, nlabels))
    
    for ii,guess in enumerate(starting_guesses):
        a,b,c = test_step_iteration(ds,md,starting_guesses[ii])
        labels[ii,:] = a
        chisq[ii,:] = b
        errs[ii,:] = c

    np.savez("%s/labels_all_starting_vals.npz" %DATA_DIR, labels)
    np.savez("%s/chisq_all_starting_vals.npz" %DATA_DIR, chisq)
    np.savez("%s/errs_all_starting_vals.npz" %DATA_DIR, errs)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj,val in enumerate(choose):
        best_labels[jj,:] = labels[:,jj,:][val]
        best_errs[jj,:] = errs[:,jj,:][val]

    np.savez("%s/test_cannon_labels.npz" %DATA_DIR, best_labels)
    np.savez("%s/test_errs.npz" %DATA_DIR, best_errs)
    np.savez("%s/test_chisq.npz" %DATA_DIR, best_chisq)

    ds.test_label_vals = best_labels
Пример #24
0
import numpy as np
import pickle
import glob
from matplotlib import rc
from lamost import load_spectra, load_labels
from TheCannon import continuum_normalization
from TheCannon import dataset
from TheCannon import model

rc('text', usetex=True)
rc('font', family='serif')

with np.load("test_data_raw.npz") as data:
    test_IDs = data['arr_0']
    wl = data['arr_1']
    test_flux = data['arr_2']
    test_ivar = data['arr_3']

data = dataset.Dataset(
        wl, test_IDs[0:10], test_flux[0:10,:], test_ivar[0:10,:], [1], test_IDs, test_flux, test_ivar)

data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])

data.continuum_normalize_gaussian_smoothing(L=50)
np.savez("./test_norm", test_IDs, wl, data.test_flux, data.test_ivar)
Пример #25
0
from TheCannon import dataset
from TheCannon import model

# (1) PREPARE DATA

tr_ID, wl, tr_flux, tr_ivar = apogee.load_spectra("example_DR10/Data")
tr_label = apogee.load_labels("example_DR10/reference_labels.csv")

# doing a 1-to-1 test for simplicity
test_ID = tr_ID
test_flux = tr_flux
test_ivar = tr_ivar
tr_label = apogee.load_labels("example_DR10/reference_labels.csv")

# choose labels and make a new array
ds = dataset.Dataset(wl, tr_ID, tr_flux, tr_ivar, tr_label, test_ID, test_flux,
                     test_ivar)

# set LaTeX label names for making diagnostic plots
ds.set_label_names(['T_{eff}', '\log g', '[Fe/H]'])

# Plot SNR distributions and triangle plot of reference labels
fig = ds.diagnostics_SNR()
fig = ds.diagnostics_ref_labels()

# (2) IDENTIFY CONTINUUM PIXELS
pseudo_tr_flux, pseudo_tr_ivar = ds.continuum_normalize_training_q(
    q=0.90, delta_lambda=50)

ds.ranges = [[371, 3192], [3697, 5500], [5500, 5997], [6461, 8255]]
contmask = ds.make_contmask(pseudo_tr_flux, pseudo_tr_ivar, frac=0.07)
Пример #26
0
ref_ivar = np.load("%s/ref_ivar.npz" %DIR)['arr_0']
ref_label = np.load("%s/ref_label.npz" %DIR)['arr_0']
cannon_label = np.load("%s/xval_cannon_label_vals.npz" %DIR)['arr_0']
snr = np.load("%s/ref_snr.npz" %DIR)['arr_0']
rv = np.load("%s/ref_rvs.npz" %DIR)['arr_0']
chisq = np.load("%s/xval_cannon_label_chisq.npz" %DIR)['arr_0']
coeffs = np.load("%s/coeffs.npz" %DIR)['arr_0']
pivots = np.load("%s/pivots.npz" %DIR)['arr_0']
scatters = np.load("%s/scatters.npz" %DIR)['arr_0']

# Create model spectra
m = model.CannonModel(2)
m.coeffs = coeffs
m.pivots = pivots
m.scatters = scatters
ds = dataset.Dataset([], [], [], [], [], [], ref_flux, ref_ivar)
ds.test_label_vals = cannon_label
m.infer_spectra(ds)

# Plot residuals of stars with large diff and large negative rvel
# (Neg rvel seems worse to me, in the scatterplot...)
diff = ref_label[:,5] - cannon_label[:,5]
choose_bad = np.logical_and(np.abs(diff) < 0.01, np.abs(rv) < 10)
choose_bad = np.logical_and(diff < -0.05, rv < -50)
choose_bad = np.logical_and(diff > 0.05, rv > 50)
choose_snr = snr > 70
#choose_chisq = np.logical_and(chisq > 1000, chisq < 10000)
#choose_quality = np.logical_and(choose_snr, choose_chisq)
#choose = np.logical_and(choose_bad, choose_quality)
choose = np.logical_and(choose_bad, choose_snr)
Пример #27
0
tr_ID = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", 
                   usecols=(1,), dtype='str', delimiter=',')

dir_dat = "example_LAMOST/Data_All"
tr_IDs, wl, tr_flux, tr_ivar = load_spectra(dir_dat, tr_ID)

label_file = "apogee_dr12_labels.csv"
all_labels = load_labels(label_file, tr_IDs)
teff = all_labels[:,0]
logg = all_labels[:,1]
mh = all_labels[:,2]
alpha = all_labels[:,3]
tr_label = np.vstack((teff, logg, mh, alpha)).T

data = dataset.Dataset(
        wl, tr_IDs, tr_flux, tr_ivar, tr_label, 
        tr_IDs, tr_flux, tr_ivar)
data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
data.continuum_normalize_gaussian_smoothing(L=50)

# get colors

colors = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", 
                    usecols=(2,4,6,8), dtype='float', delimiter=',')
errors = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", 
                    usecols=(3,5,7,9), dtype='float', delimiter=',') 
ivars = 1./ errors**2
colors = colors[np.argsort(tr_ID)]
ivars = ivars[np.argsort(tr_ID)]
ivars = ivars * 1e15
Пример #28
0
def fitting_ve(name):

    image_path = name
    if not os.path.exists(image_path):
        print("{}/{} could not be found: {}".format(i + 1, N, image_path))
        keep[i] = False

    # We only store flux,ivar,inf_flux,parameters,parameters_new,parameters_sim,ve(n*3)(include ve, ve_new,ve_sim)
    try:
        image = fits.open(image_path, ignore_missing_end=True)
        dat = Table.read(image_path)

        flux = image[1].data
        flux_err = image[2].data

        flux = np.atleast_2d(flux)
        flux_err = np.atleast_2d(flux_err)


    except IOError:

        print("opts. This one fail")
        em =0

    else:

        em =1

        badpix = get_pixmask(flux, flux_err)
        ivar = 1.0 / flux_err ** 2
        error = flux_err
        # badpix is a array and the length is 8575
        flux = np.array(flux, dtype=np.float64)
        ivar = np.array(ivar, dtype=np.float64)

        flux[badpix] = np.median(flux)
        ivar[badpix] = 0.0

        flux = np.array(flux)
        ivar = np.array(ivar)

        # normalize flux:
        # value

        tr_ID = image_path

        test_labels_all_i = np.array([5000, 1, 1])

        ds = dataset.Dataset(wl, tr_ID, flux, ivar,
                             test_labels_all_i, tr_ID, flux, ivar)

        ds.ranges = [[371, 3192], [3697, 5997], [6461, 8255]]

        # set sudo-continuous spectrum
        pseudo_tr_flux, pseudo_tr_ivar = ds.continuum_normalize_training_q \
            (q=0.90, delta_lambda=50)

        # set mask
        contmask = ds.make_contmask(pseudo_tr_flux, pseudo_tr_ivar, frac=0.07)

        # get continuous mask

        ds.set_continuum(contmask)

        # fit the normalized-spectrum in the continuous region

        cont = ds.fit_continuum(3, "sinusoid")

        # Obtain the normalized flux
        norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \
            ds.continuum_normalize(cont)

        norm_tr_flux = np.atleast_2d(norm_tr_flux)

        if len(norm_tr_flux[:,0])<3:
            em=0
        else:
            nothing=1

        # infer labels


        # inf_labels = model.fit(norm_tr_flux, norm_tr_ivar)


        # Use inferred labels from the combined spectra:


        inf_labels = model.fit(norm_tr_flux, norm_tr_ivar)
        # only use the inf labels from the combined spectra

        com = len(inf_labels[:, 0])

        inf_labels_com = inf_labels[0, :]

        inf_labels = []
        for z in range(0, com):
            inf_labels.append(inf_labels_com)

        inf_labels = np.array(inf_labels)

        v = model.vectorizer.get_label_vector(inf_labels)
        inf_flux = np.dot(v, model.theta.T)
        opt_flux, parameters = model.fitting_spectrum_parameters_single \
            (norm_tr_flux, norm_tr_ivar, inf_flux)


        # calculate chi-squared!

        chi_inf = (norm_tr_flux-inf_flux)**2*norm_tr_ivar
        chi_inf = np.sum(chi_inf,axis=1)

        chi_mix = (norm_tr_flux-opt_flux)**2*norm_tr_ivar
        chi_mix = np.sum(chi_mix,axis=1)



        ve = (parameters[:, 2] - parameters[:, 0]) / (parameters[:, 0] + parameters[:, 1] + parameters[:, 2]) * 4144.68

        ve_un = model.uncertainty

        # old
        a0 = parameters
        a1 = ve
        a2 = ve_un

        # covariance matrix for abc
        a3 = model.un_cov

        # spectra

        a4 = norm_tr_flux
        a5 = norm_tr_ivar
        a6 = inf_flux
        a7 = opt_flux

        # inf_labels are from the
        a8 = inf_labels

        a9 = chi_inf

        a10 = chi_mix

        # VHELIO
        a11 = np.array(dat[0]["VHELIO"])

        # Fiber

        a12 = np.array(dat[0]["FIBER"])

        # Files

        # BJD

        RA = image[0].header["RA"]

        DEC = image[0].header["DEC"]

        SNR = image[0].header["SNR"]

        MJD = dat[0]["MJD"]

        c = SkyCoord(RA, DEC, frame='icrs', unit='deg')

        BJD = MJD2BJD(MJD, c)

        a13 = np.array(BJD)

        # calculate chi-squared:


        try:
            # save them

            # pay attention to the fits file saving

            path_fits_i = image_path.replace("/Volumes/Data_2TB/Data/DR13_rc/apStar-r6-",
                                             "/Users/caojunzhi/Desktop/Data/dr13_red_clump/")

            print("saving files" + path_fits_i)

            hdu = fits.PrimaryHDU(data=a0)
            hdu.header[
                'COMMENT'] = "Simple orange juice"

            # add header info

            hdu.header['SNR'] = SNR
            hdu.header['RA'] = RA
            hdu.header['DEC'] = DEC

            hdu.writeto(path_fits_i, clobber=True)

            ts.append(path_fits_i, a1)
            ts.append(path_fits_i, a2)
            ts.append(path_fits_i, a3)
            ts.append(path_fits_i, a4)
            ts.append(path_fits_i, a5)
            ts.append(path_fits_i, a6)
            ts.append(path_fits_i, a7)
            ts.append(path_fits_i, a8)

            ts.append(path_fits_i, a9)
            ts.append(path_fits_i, a10)
            ts.append(path_fits_i, a11)
            ts.append(path_fits_i, a12)
            ts.append(path_fits_i, a13)

        except OSError:
            print("fail")
            em=0

    return em
Пример #29
0
def run(date):
    # Training step has already been completed. Load the model,
    spectral_model = model.CannonModel(2) # 2 = quadratic model
    spectral_model.coeffs = np.load("./coeffs.npz")['arr_0']
    spectral_model.scatters = np.load("./scatter.npz")['arr_0']
    spectral_model.chisqs = np.load("./chisqs.npz")['arr_0']
    spectral_model.pivots = np.load("./pivots.npz")['arr_0']

    # Load the wavelength array
    wl = np.load("wl.npz")['arr_0']

    # Load the test set,
    test_ID = np.loadtxt("test_obj/%s_test_obj.txt" %date, dtype=str)
    print("%s test objects" %len(test_ID))
    dir_dat = "/home/share/LAMOST/DR2/DR2_release"
    test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID)
    np.savez("output/%s_ids" %date, test_IDs)
    #np.savez("./%s_data_raw" %date, test_flux, test_ivar)

    # Load the corresponding LAMOST labels,
    labels = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0']
    inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) 
    nstars = len(test_IDs)
    lamost_labels = np.zeros((nstars,4))
    lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) 
    np.savez("output/%s_lamost_label" %date, lamost_labels)
    
    # Set dataset object
    data = dataset.Dataset(
            wl, test_IDs, test_flux, test_ivar, 
            lamost_labels, test_IDs, test_flux, test_ivar)

    # set the headers for plotting
    data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
    
    # Plot SNR distribution
    data.diagnostics_SNR(figname="%s_SNRdist.png" %date)
    np.savez("output/%s_SNR" %date, data.test_SNR)

    # Continuum normalize, 
    filename = "output/%s_norm.npz" %date
    if glob.glob(filename):
        print("already cont normalized")
        data.test_flux = np.load(filename)['arr_0']
        data.test_ivar = np.load(filename)['arr_1']
    else:
        data.tr_ID = data.tr_ID[0]
        data.tr_flux = data.tr_flux[0,:]
        data.tr_ivar = data.tr_ivar[0,:]
        data.continuum_normalize_gaussian_smoothing(L=50)
        np.savez("output/%s_norm" %date, data.test_flux, data.test_ivar)

    # Infer labels 
    errs, chisq = spectral_model.infer_labels(data)
    np.savez("output/%s_cannon_labels.npz" %date, data.test_label_vals)
    np.savez("./%s_formal_errors.npz" %date, errs)
    np.savez("./%s_chisq.npz" %date, chisq)

    # Make plots
    data.test_label_vals = data.test_label_vals[:,0:3] # so it doesn't try alpha
    data.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    data.diagnostics_1to1(figname="%s_1to1_label" %date)
Пример #30
0
def test_step():
    wl = np.load(SPEC_DIR + "/wl_cols.npz")
    ref_id_all = np.load(SPEC_DIR + "/ref_id_col.npz")['arr_0']
    excised = np.load(SPEC_DIR + "/excised_obj/excised_ids.npz")['arr_0']
    inds = np.array([np.where(ref_id_all == val)[0][0] for val in excised])
    test_ID = ref_id_all[inds]
    print(str(len(test_ID)) + " objects")
    test_flux = np.load("%s/ref_flux_col.npz" % (SPEC_DIR))['arr_0'][inds]
    test_ivar = np.load("%s/ref_ivar_col.npz" % (SPEC_DIR))['arr_0'][inds]

    apogee_label = np.load("%s/ref_label.npz" % (SPEC_DIR))['arr_0'][inds]
    #np.savez("excised_label.npz", apogee_label)

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :],
                         apogee_label, test_ID, test_flux, test_ivar)
    ds.set_label_names([
        'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/Fe]', 'A_k'
    ])
    np.savez("excised_snr.npz", ds.test_SNR)
    print("DONE")

    m = model.CannonModel(2)
    m.coeffs = np.load(MODEL_DIR + "/coeffs.npz")['arr_0']
    m.scatters = np.load(MODEL_DIR + "/scatters.npz")['arr_0']
    m.chisqs = np.load(MODEL_DIR + "/chisqs.npz")['arr_0']
    m.pivots = np.load(MODEL_DIR + "/pivots.npz")['arr_0']

    nlabels = len(m.pivots)
    nobj = len(test_ID)

    nguesses = 7
    choose = np.random.randint(0, nobj, size=nguesses)
    print(apogee_label.shape)
    print(choose.shape)
    print(m.pivots.shape)
    starting_guesses = apogee_label[choose] - m.pivots

    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("excised_all_cannon_labels.npz", best_labels)
    np.savez("excised_cannon_label_chisq.npz", best_chisq)
    np.savez("excised_cannon_label_errs.npz", best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_1to1(figname="excised_1to1_test_label")