Пример #1
0
def test_zestimation(nspec: int):
    filename = filenames[nspec]

    if not os.path.exists(filename):
        plate, mjd, fiber_id = re.findall(
            r"spec-([0-9]+)-([0-9]+)-([0-9]+).fits",
            filename,
        )[0]
        retrieve_raw_spec(int(plate), int(mjd), int(fiber_id))

    params = ZParameters()
    z_qso_samples = ZSamples(params)

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)

    z_qso_gp = ZGPMAT(
        params,
        z_qso_samples,
        learned_file=
        "data/dr12q/processed/learned_zqso_only_model_outdata_normout_dr9q_minus_concordance_norm_1176-1256.mat",
    )

    tic = time.time()

    z_qso_gp.inference_z_qso(wavelengths, flux, noise_variance, pixel_mask)
    print("Z True : {:.3g}".format(z_qsos[nspec]))

    toc = time.time()
    print("spent {} mins; {} seconds".format((toc - tic) // 60,
                                             (toc - tic) % 60))

    return z_qso_gp.z_map, z_qsos[nspec]
Пример #2
0
def test_read_spec():
    if not os.path.exists("spec-7340-56825-0576.fits"):
        retrieve_raw_spec(7340, 56825, 576)  # an arbitrary spectrum

    wavelengths, flux, noise_variance, pixel_mask = read_spec(
        "spec-7340-56825-0576.fits"
    )

    assert min(wavelengths) > 1216
    assert len(flux) == len(noise_variance)
    assert type(pixel_mask[0]) is np.bool_
def download_ho_2020_spectrum(num_quasars: int = 5):
    """
    Download first N spectra from Ho-Bird-Garnett (2020) catalogue.
    """
    assert num_quasars <= 100

    # first 100 from catalogue
    plates = np.array([
        6173, 6177, 4354, 6498, 6177, 4216, 6182, 4296, 7134, 6877, 6177, 4277,
        4415, 4216, 4216, 7167, 6177, 4354, 7144, 6177, 7147, 7144, 6511, 6511,
        6151, 4216, 4535, 6182, 7034, 6177, 6151, 6498, 7147, 6182, 4354, 6177,
        6177, 4354, 6879, 6151, 7144, 4354, 4277, 6879, 6498, 6182, 6879, 4535,
        7167, 6879, 4535, 4216, 4216, 4415, 6182, 6511, 6207, 4216, 6177, 4296,
        4277, 7034, 4277, 6152, 6172, 7033, 4216, 4277, 6498, 7033, 4415, 4535,
        6877, 6170, 4296, 6498, 6513, 6177, 4535, 6151, 4216, 4296, 4296, 7147,
        4535, 4296, 7167, 6172, 4535, 6172, 4216, 7147, 4296, 7167, 4216, 7147,
        4296, 6177, 6879, 7034
    ])
    mjds = np.array([
        56238, 56268, 55810, 56565, 56268, 55477, 56190, 55499, 56566, 56544,
        56268, 55506, 55831, 55477, 55477, 56604, 56268, 55810, 56564, 56268,
        56574, 56564, 56540, 56540, 56265, 55477, 55860, 56190, 56564, 56268,
        56265, 56565, 56574, 56190, 55810, 56268, 56268, 55810, 56539, 56265,
        56564, 55810, 55506, 56539, 56565, 56190, 56539, 55860, 56604, 56539,
        55860, 55477, 55477, 55831, 56190, 56540, 56239, 55477, 56268, 55499,
        55506, 56564, 55506, 56164, 56269, 56565, 55477, 55506, 56565, 56565,
        55831, 55860, 56544, 56240, 55499, 56565, 56543, 56268, 55860, 56265,
        55477, 55499, 55499, 56574, 55860, 55499, 56604, 56269, 55860, 56269,
        55477, 56574, 55499, 56604, 55477, 56574, 55499, 56268, 56539, 56564
    ])

    fiber_ids = np.array([
        528, 595, 646, 177, 608, 312, 652, 364, 594, 564, 648, 896, 554, 302,
        292, 290, 384, 686, 752, 640, 860, 266, 92, 86, 88, 732, 680, 342, 358,
        386, 936, 844, 171, 338, 702, 584, 393, 709, 439, 78, 221, 700, 872,
        580, 838, 326, 436, 302, 259, 427, 361, 718, 276, 466, 642, 114, 134,
        724, 360, 386, 862, 657, 106, 4, 643, 2, 290, 152, 157, 14, 580, 315,
        440, 573, 390, 158, 892, 366, 316, 954, 280, 656, 630, 138, 734, 382,
        796, 628, 304, 342, 756, 889, 398, 238, 248, 900, 392, 656, 405, 647
    ])

    for plate, mjd, fiber_id in zip(plates[:num_quasars], mjds[:num_quasars],
                                    fiber_ids[:num_quasars]):

        filename = "spec-{}-{}-{}.fits".format(plate, mjd,
                                               str(fiber_id).zfill(4))

        print(filename)

        if not os.path.exists(filename):
            retrieve_raw_spec(plate, mjd, fiber_id)  # the spectrum at paper
Пример #4
0
def test_prior():
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        True,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    log_priors = dla_gp.log_priors(z_qso, max_dlas=4)

    catalog_log_priors = np.array(
        [-2.53774598, -4.97413739, -7.40285925, -9.74851888])

    assert np.all(np.abs(log_priors - catalog_log_priors) < 1e-4)
Пример #5
0
def prepare_subdla_model(plate: int = 5309,
                         mjd: int = 55929,
                         fiber_id: int = 362,
                         z_qso: float = 3.166) -> SubDLAGPMAT:
    """
    Return a SubDLAGP instance from an input SDSS DR12 spectrum.
    """
    filename = "spec-{}-{}-{}.fits".format(plate, mjd, str(fiber_id).zfill(4))

    if not os.path.exists(filename):
        retrieve_raw_spec(plate, mjd, fiber_id)  # the spectrum at paper

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    subdla_samples = SubDLASamplesMAT(
        param, prior, "data/dr12q/processed/subdla_samples.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    subdla_gp = SubDLAGPMAT(
        param,
        prior,
        subdla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        True,
    )
    subdla_gp.set_data(rest_wavelengths,
                       flux,
                       noise_variance,
                       pixel_mask,
                       z_qso,
                       build_model=True)

    return subdla_gp
Пример #6
0
def test_log_likelihood_no_dla():
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    gp = NullGPMAT(
        param,
        prior,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
    )

    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert (np.abs(log_likelihood_no_dla - (-889.04809017)) < 1
            )  # there is some numerical difference

    plt.figure(figsize=(16, 5))
    plt.plot(gp.x, gp.y, label="observed flux")
    plt.plot(gp.rest_wavelengths, gp.mu, label="null GP")
    plt.plot(gp.x, gp.this_mu, label="interpolated null GP")
    plt.xlabel("rest wavelengths")
    plt.ylabel("normalised flux")
    plt.legend()
    plt.savefig("test1.pdf", format="pdf", dpi=300)
    plt.clf()
    plt.close()

    # test 2
    filename = "spec-3816-55272-0076.fits"
    z_qso = 3.68457627

    if not os.path.exists(filename):
        retrieve_raw_spec(3816, 55272, 76)  # the spectrum at paper

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert np.abs(log_likelihood_no_dla - (-734.3727266)) < 1

    plt.figure(figsize=(16, 5))
    plt.plot(gp.x, gp.y, label="observed flux")
    plt.plot(gp.rest_wavelengths, gp.mu, label="null GP")
    plt.plot(gp.x, gp.this_mu, label="interpolated null GP")
    plt.xlabel("rest wavelengths")
    plt.ylabel("normalised flux")
    plt.legend()
    plt.savefig("test2.pdf", format="pdf", dpi=300)
    plt.clf()
    plt.close()
Пример #7
0
def test_dla_model_evidences(broadening: bool = True):
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        broadening,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    tic = time.time()

    max_dlas = 4
    log_likelihoods_dla = dla_gp.log_model_evidences(max_dlas)

    toc = time.time()
    # very time consuming: ~ 4 mins for a single spectrum without parallelized.
    print("spent {} mins; {} seconds".format((toc - tic) // 60,
                                             (toc - tic) % 60))

    # log likelihood results from the catalog
    catalog_log_likelihoods_dla = np.array(
        [-688.91647288, -633.00070813, -634.08569242, -640.77120558])

    for i in range(max_dlas):
        print("log p(  D  | z_QSO, DLA{} ) : {:.5g}; MATLAB value: {:.5g}".
              format(i + 1, log_likelihoods_dla[i],
                     catalog_log_likelihoods_dla[i]))

    # the accuracy down to 2.5 in log scale, this needs to be investigated.
    assert np.all(
        np.abs(catalog_log_likelihoods_dla - log_likelihoods_dla) < 2.5)
Пример #8
0
def test_dla_model(broadening: bool = True):
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        broadening,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    # These are the MAPs from the paper
    z_dlas = np.array([2.52182382, 3.03175723])
    nhis = 10**np.array([20.63417494, 22.28420156])

    sample_log_likelihood_dla = dla_gp.sample_log_likelihood_k_dlas(
        z_dlas, nhis)
    print("log p(  D  | z_QSO, zdlas, nhis ) : {:.5g}".format(
        sample_log_likelihood_dla))

    # Build a Null model
    gp = NullGPMAT(
        param,
        prior,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
    )
    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert sample_log_likelihood_dla > log_likelihood_no_dla