Пример #1
0
def test_zestimation(nspec: int):
    filename = filenames[nspec]

    if not os.path.exists(filename):
        plate, mjd, fiber_id = re.findall(
            r"spec-([0-9]+)-([0-9]+)-([0-9]+).fits",
            filename,
        )[0]
        retrieve_raw_spec(int(plate), int(mjd), int(fiber_id))

    params = ZParameters()
    z_qso_samples = ZSamples(params)

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)

    z_qso_gp = ZGPMAT(
        params,
        z_qso_samples,
        learned_file=
        "data/dr12q/processed/learned_zqso_only_model_outdata_normout_dr9q_minus_concordance_norm_1176-1256.mat",
    )

    tic = time.time()

    z_qso_gp.inference_z_qso(wavelengths, flux, noise_variance, pixel_mask)
    print("Z True : {:.3g}".format(z_qsos[nspec]))

    toc = time.time()
    print("spent {} mins; {} seconds".format((toc - tic) // 60,
                                             (toc - tic) % 60))

    return z_qso_gp.z_map, z_qsos[nspec]
Пример #2
0
def test_read_spec():
    if not os.path.exists("spec-7340-56825-0576.fits"):
        retrieve_raw_spec(7340, 56825, 576)  # an arbitrary spectrum

    wavelengths, flux, noise_variance, pixel_mask = read_spec(
        "spec-7340-56825-0576.fits"
    )

    assert min(wavelengths) > 1216
    assert len(flux) == len(noise_variance)
    assert type(pixel_mask[0]) is np.bool_
Пример #3
0
def test_prior():
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        True,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    log_priors = dla_gp.log_priors(z_qso, max_dlas=4)

    catalog_log_priors = np.array(
        [-2.53774598, -4.97413739, -7.40285925, -9.74851888])

    assert np.all(np.abs(log_priors - catalog_log_priors) < 1e-4)
Пример #4
0
def prepare_subdla_model(plate: int = 5309,
                         mjd: int = 55929,
                         fiber_id: int = 362,
                         z_qso: float = 3.166) -> SubDLAGPMAT:
    """
    Return a SubDLAGP instance from an input SDSS DR12 spectrum.
    """
    filename = "spec-{}-{}-{}.fits".format(plate, mjd, str(fiber_id).zfill(4))

    if not os.path.exists(filename):
        retrieve_raw_spec(plate, mjd, fiber_id)  # the spectrum at paper

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    subdla_samples = SubDLASamplesMAT(
        param, prior, "data/dr12q/processed/subdla_samples.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    subdla_gp = SubDLAGPMAT(
        param,
        prior,
        subdla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        True,
    )
    subdla_gp.set_data(rest_wavelengths,
                       flux,
                       noise_variance,
                       pixel_mask,
                       z_qso,
                       build_model=True)

    return subdla_gp
Пример #5
0
def test_log_likelihood_no_dla():
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    gp = NullGPMAT(
        param,
        prior,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
    )

    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert (np.abs(log_likelihood_no_dla - (-889.04809017)) < 1
            )  # there is some numerical difference

    plt.figure(figsize=(16, 5))
    plt.plot(gp.x, gp.y, label="observed flux")
    plt.plot(gp.rest_wavelengths, gp.mu, label="null GP")
    plt.plot(gp.x, gp.this_mu, label="interpolated null GP")
    plt.xlabel("rest wavelengths")
    plt.ylabel("normalised flux")
    plt.legend()
    plt.savefig("test1.pdf", format="pdf", dpi=300)
    plt.clf()
    plt.close()

    # test 2
    filename = "spec-3816-55272-0076.fits"
    z_qso = 3.68457627

    if not os.path.exists(filename):
        retrieve_raw_spec(3816, 55272, 76)  # the spectrum at paper

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert np.abs(log_likelihood_no_dla - (-734.3727266)) < 1

    plt.figure(figsize=(16, 5))
    plt.plot(gp.x, gp.y, label="observed flux")
    plt.plot(gp.rest_wavelengths, gp.mu, label="null GP")
    plt.plot(gp.x, gp.this_mu, label="interpolated null GP")
    plt.xlabel("rest wavelengths")
    plt.ylabel("normalised flux")
    plt.legend()
    plt.savefig("test2.pdf", format="pdf", dpi=300)
    plt.clf()
    plt.close()
Пример #6
0
def test_dla_model_evidences(broadening: bool = True):
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        broadening,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    tic = time.time()

    max_dlas = 4
    log_likelihoods_dla = dla_gp.log_model_evidences(max_dlas)

    toc = time.time()
    # very time consuming: ~ 4 mins for a single spectrum without parallelized.
    print("spent {} mins; {} seconds".format((toc - tic) // 60,
                                             (toc - tic) % 60))

    # log likelihood results from the catalog
    catalog_log_likelihoods_dla = np.array(
        [-688.91647288, -633.00070813, -634.08569242, -640.77120558])

    for i in range(max_dlas):
        print("log p(  D  | z_QSO, DLA{} ) : {:.5g}; MATLAB value: {:.5g}".
              format(i + 1, log_likelihoods_dla[i],
                     catalog_log_likelihoods_dla[i]))

    # the accuracy down to 2.5 in log scale, this needs to be investigated.
    assert np.all(
        np.abs(catalog_log_likelihoods_dla - log_likelihoods_dla) < 2.5)
Пример #7
0
def test_dla_model(broadening: bool = True):
    # test 1
    filename = "spec-5309-55929-0362.fits"

    if not os.path.exists(filename):
        retrieve_raw_spec(5309, 55929, 362)  # the spectrum at paper

    z_qso = 3.166

    param = Parameters()

    # prepare these files by running the MATLAB scripts until build_catalog.m
    prior = PriorCatalog(
        param,
        "data/dr12q/processed/catalog.mat",
        "data/dla_catalogs/dr9q_concordance/processed/los_catalog",
        "data/dla_catalogs/dr9q_concordance/processed/dla_catalog",
    )
    dla_samples = DLASamplesMAT(param, prior,
                                "data/dr12q/processed/dla_samples_a03.mat")

    wavelengths, flux, noise_variance, pixel_mask = read_spec(filename)
    rest_wavelengths = param.emitted_wavelengths(wavelengths, z_qso)

    # DLA GP Model
    dla_gp = DLAGPMAT(
        param,
        prior,
        dla_samples,
        3000.0,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
        broadening,
    )
    dla_gp.set_data(rest_wavelengths,
                    flux,
                    noise_variance,
                    pixel_mask,
                    z_qso,
                    build_model=True)

    # These are the MAPs from the paper
    z_dlas = np.array([2.52182382, 3.03175723])
    nhis = 10**np.array([20.63417494, 22.28420156])

    sample_log_likelihood_dla = dla_gp.sample_log_likelihood_k_dlas(
        z_dlas, nhis)
    print("log p(  D  | z_QSO, zdlas, nhis ) : {:.5g}".format(
        sample_log_likelihood_dla))

    # Build a Null model
    gp = NullGPMAT(
        param,
        prior,
        "data/dr12q/processed/learned_qso_model_lyseries_variance_kim_dr9q_minus_concordance.mat",
    )
    gp.set_data(rest_wavelengths,
                flux,
                noise_variance,
                pixel_mask,
                z_qso,
                build_model=True)

    log_likelihood_no_dla = gp.log_model_evidence()
    print(
        "log p(  D  | z_QSO, no DLA ) : {:.5g}".format(log_likelihood_no_dla))

    assert sample_log_likelihood_dla > log_likelihood_no_dla