Пример #1
0
def ism_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    # spectra = read_spectrum_hdf5.SpectraWithMetadata(qso_record_table, settings.get_qso_spectra_hdf5())
    # continuum_fit_file = NpSpectrumContainer(True, filename=settings.get_continuum_fit_npy())
    delta_transmittance_file = NpSpectrumContainer(
        readonly=True,
        filename=settings.get_delta_t_npy(),
        max_wavelength_count=1000)

    num_spectra = len(qso_record_table)
    ism_delta_t = NpSpectrumContainer(False, num_spectra=num_spectra)
    # warning: np.ndarray is not initialized by default. zeroing manually.
    ism_delta_t.zero()
    n = 0
    for i in range(len(qso_record_table)):
        qso_rec = QSORecord.from_row(qso_record_table[i])
        index = qso_rec.index

        # read original delta transmittance
        ar_redshift = delta_transmittance_file.get_wavelength(index)
        # ar_flux = delta_transmittance_file.get_flux(index)
        ar_ivar = delta_transmittance_file.get_ivar(index)

        # get correction to ISM
        # ar_flux_new, ar_ivar_new, is_corrected = pre_process_spectrum.mw_lines.apply_correction(
        #     ar_wavelength, np.ones_like(ar_flux), ar_ivar, qso_rec.ra, qso_rec.dec)

        ar_wavelength = (ar_redshift + 1) * lya_center  # type: np.ndarray
        # limit maximum bin number because higher extinction bins are not reliable
        max_extinction_bin = max(20, ar_extinction_levels.size)

        if np.isfinite(qso_rec.extinction_g):
            extinction_bin = int(
                np.round(
                    np.interp(qso_rec.extinction_g, ar_extinction_levels,
                              np.arange(max_extinction_bin))))
        else:
            extinction_bin = 0

        l_print_no_barrier("extinction_bin = ", extinction_bin)
        ar_ism_resampled = np.interp(
            ar_wavelength,
            extinction_spectra_list[extinction_bin][0],
            extinction_spectra_list[extinction_bin][1],
            left=np.nan,
            right=np.nan)
        extinction = ar_extinction_levels[extinction_bin]
        # rescale according to QSO extinction
        l_print_no_barrier(qso_rec.extinction_g, extinction)
        ism_scale_factor = 1.
        ar_flux_new = (ar_ism_resampled - 1
                       ) * ism_scale_factor * qso_rec.extinction_g / extinction

        mask = np.logical_and(np.isfinite(ar_flux_new), ar_ivar)

        ism_delta_t.set_wavelength(i, ar_redshift[mask])
        # use reciprocal to get absorption spectrum, then subtract 1 to get the delta
        ism_delta_t.set_flux(i, ar_flux_new[mask])
        # ism_delta_t.set_flux(i, np.ones_like(ar_flux) * qso_rec.extinction_g)
        # use original ivar because we are not correcting an existing spectrum
        ism_delta_t.set_ivar(i, ar_ivar[mask])

        n += 1

    l_print_no_barrier("chunk n =", n, "offset =", start_offset)
    return ism_delta_t.as_np_array(), None
Пример #2
0
def delta_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    spectra = read_spectrum_hdf5.SpectraWithMetadata(
        qso_record_table, settings.get_qso_spectra_hdf5())
    continuum_fit_file = ContinuumFitContainerFiles(False)

    num_spectra = len(qso_record_table)
    delta_t = NpSpectrumContainer(False, num_spectra=num_spectra)
    # warning: np.ndarray is not initialized by default. zeroing manually.
    delta_t.zero()
    m = mean_transmittance.MeanTransmittance.from_file(
        settings.get_mean_transmittance_npy())
    # m = median_transmittance.MedianTransmittance.from_file(settings.get_median_transmittance_npy())
    # for debugging with a small data set:
    # ignore values with less than 20 sample points
    ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_mean_with_minimum_count(
        20)
    # ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_median_with_minimum_count(20, weighted=True)
    remove_dla = RemoveDlaSimple()

    pixel_weight = pixel_weight_coefficients.PixelWeight(
        pixel_weight_coefficients.DEFAULT_WEIGHT_Z_RANGE)
    for n in range(len(qso_record_table)):
        qso_spec_obj = spectra.return_spectrum(n)
        index = qso_spec_obj.qso_rec.index

        if not continuum_fit_file.get_is_good_fit(index):
            local_delta_stats['bad_fit'] += 1
            l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec)
            continue

        ar_fit_spectrum = continuum_fit_file.get_flux(index)
        # we assume the fit spectrum uses the same wavelengths.

        lya_forest_transmittance = qso_transmittance(
            qso_spec_obj,
            ar_fit_spectrum,
            local_delta_stats,
            downsample_factor=settings.get_forest_downsample_factor())
        ar_z = lya_forest_transmittance.ar_z
        if ar_z.size:
            # prepare the mean transmittance for the z range of this QSO
            ar_mean_flux_for_z_range = np.asarray(
                np.interp(ar_z, ar_z_mean_transmittance,
                          ar_mean_transmittance))

            # delta transmittance is the change in relative transmittance vs the mean
            # therefore, subtract 1.
            ar_delta_t = lya_forest_transmittance.ar_transmittance / ar_mean_flux_for_z_range - 1

            # finish the error estimation, and save it
            ar_delta_t_ivar = pixel_weight.eval(
                lya_forest_transmittance.ar_ivar,
                ar_mean_flux_for_z_range * lya_forest_transmittance.ar_fit,
                ar_z)

            # simple DLA removal (without using a catalog)
            if settings.get_enable_simple_dla_removal():
                # remove DLA regions by setting the ivar of nearby pixels to 0
                ar_dla_mask = remove_dla.get_mask(ar_delta_t)
                if np.any(ar_dla_mask):
                    l_print_no_barrier("DLA(s) removed from QSO: ",
                                       qso_spec_obj.qso_rec)
                ar_delta_t_ivar[ar_dla_mask] = 0

            # ignore nan or infinite values (in case m_mean has incomplete data because of a low sample size)
            # Note: using wavelength field to store redshift
            finite_mask = np.logical_and(np.isfinite(ar_delta_t),
                                         np.isfinite(ar_delta_t_ivar))
            finite_z = ar_z[finite_mask]
            finite_delta_t = ar_delta_t[finite_mask]
            finite_ivar = ar_delta_t_ivar[finite_mask]

            # detrend forests with large enough range in comoving coordinates:
            finite_distances = cd.fast_comoving_distance(finite_z)
            if finite_distances[-1] - finite_distances[0] > 500:
                delta_t_boxcar = nu_boxcar(finite_distances,
                                           finite_delta_t,
                                           lambda c: c - 300,
                                           lambda c: c + 300,
                                           weights=finite_ivar)
                finite_delta_t = finite_delta_t - delta_t_boxcar

            delta_t.set_wavelength(n, finite_z)
            delta_t.set_flux(n, finite_delta_t)
            delta_t.set_ivar(n, finite_ivar)
        else:
            # empty record
            pass
            delta_transmittance_chunk.num_spec += 1

    l_print_no_barrier("finished chunk, num spectra:",
                       delta_transmittance_chunk.num_spec, " offset: ",
                       start_offset)
    return delta_t.as_np_array(), None
Пример #3
0
class ContinuumFitContainer(object):
    def __init__(self, num_spectra=-1):
        self.num_spectra = num_spectra
        self.np_spectrum = NpSpectrumContainer(readonly=False, num_spectra=num_spectra)
        self.continuum_fit_metadata = table.Table()
        self.continuum_fit_metadata.add_columns(
            [table.Column(name='index', dtype='i8', unit=None, length=num_spectra),
             table.Column(name='is_good_fit', dtype='b', unit=None, length=num_spectra),
             table.Column(name='goodness_of_fit', dtype='f8', unit=None, length=num_spectra),
             table.Column(name='snr', dtype='f8', unit=None, length=num_spectra)])

        # initialize array
        self.np_spectrum.zero()

    def get_wavelength(self, n):
        return self.np_spectrum.get_wavelength(n)

    def get_flux(self, n):
        return self.np_spectrum.get_flux(n)

    def set_wavelength(self, n, data):
        self.np_spectrum.set_wavelength(n, data)

    def set_flux(self, n, data):
        self.np_spectrum.set_flux(n, data)

    def set_metadata(self, n, is_good_fit, goodness_of_fit, snr):
        self.continuum_fit_metadata[n] = [n, is_good_fit, goodness_of_fit, snr]

    def copy_metadata(self, n, metadata):
        self.continuum_fit_metadata[n] = metadata

    def get_metadata(self, n):
        return self.continuum_fit_metadata[n]

    def get_is_good_fit(self, n):
        return self.get_metadata(n)['is_good_fit']

    def get_goodness_of_fit(self, n):
        return self.get_metadata(n)['goodness_of_fit']

    def get_snr(self, n):
        return self.get_metadata(n)['snr']

    @classmethod
    def from_np_array_and_object(cls, np_array, obj):
        # TODO: consider refactoring.
        np_spectrum = NpSpectrumContainer.from_np_array(np_array, readonly=True)
        new_instance = cls(num_spectra=np_spectrum.num_spectra)
        # replace spectrum container with existing data
        new_instance.np_spectrum = np_spectrum
        # replace metadata with existing metadata object
        assert type(new_instance.continuum_fit_metadata) == type(obj)
        new_instance.continuum_fit_metadata = obj
        return new_instance

    def as_object(self):
        return self.continuum_fit_metadata

    def as_np_array(self):
        return self.np_spectrum.as_np_array()