Exemplo n.º 1
0
    def __parse_config(self, config):
        """Parse the configuration options

        Arguments
        ---------
        config: configparser.SectionProxy
        Parsed options to initialize class

        Raise
        -----
        DataError upon missing required variables
        """
        # instance variables
        self.blinding = config.get("blinding")
        if self.blinding is None:
            raise DataError("Missing argument 'blinding' required by DesiData")
        if self.blinding not in ACCEPTED_BLINDING_STRATEGIES:
            raise DataError(
                "Unrecognized blinding strategy. Accepted strategies "
                f"are {ACCEPTED_BLINDING_STRATEGIES}. "
                f"Found '{self.blinding}'")

        self.num_processors = config.getint("num processors")
        if self.num_processors is None:
            raise DataError(
                "Missing argument 'num processors' required by DesiData")
        if self.num_processors == 0:
            self.num_processors = (multiprocessing.cpu_count() // 2)

        self.use_non_coadded_spectra = config.getboolean(
            "use non-coadded spectra")
        if self.use_non_coadded_spectra is None:
            raise DataError(
                "Missing argument 'use non-coadded spectra' required by DesiData"
            )
Exemplo n.º 2
0
    def read_file(self, filename, catalogue):
        """Read the spectra and formats its data as Forest instances.

        Arguments
        ---------
        filename: str
        Name of the file to read

        catalogue: astropy.table.Table
        The quasar catalogue fragment associated with this file

        Returns:
        ---------
        forests_by_targetid: dict
        Dictionary were forests are stored.

        num_data: int
        The number of instances loaded

        Raise
        -----
        DataError if the analysis type is PK 1D and resolution data is not present
        """
        raise DataError(
            "Function 'read_data' was not overloaded by child class")
Exemplo n.º 3
0
    def __init__(self, config):
        """Initialize class instance

        Arguments
        ---------
        config: configparser.SectionProxy
        Parsed options to initialize class

        Raise
        -----
        DataError if the selected reading mode is not supported
        """
        self.logger = logging.getLogger(__name__)

        # load variables from config
        self.mode = None
        self.__parse_config(config)

        super().__init__(config)

        # load DRQ Catalogue
        catalogue = DrqCatalogue(config).catalogue

        # read data
        if self.mode == "spplate":
            self.read_from_spplate(catalogue)
        elif self.mode == "spec":
            self.read_from_spec(catalogue)
        else:
            raise DataError(
                f"Error reading data in SdssData. Mode {self.mode} "
                "is not supported.")
Exemplo n.º 4
0
    def __parse_config(self, config):
        """Parse the configuration options

        Arguments
        ---------
        config: configparser.SectionProxy
        Parsed options to initialize class

        Raise
        -----
        DataError upon missing required variables
        """
        # instance variables
        self.mode = config.get("mode")
        if self.mode is None:
            raise DataError("Missing argument 'mode' required by SdssData")

        rebin = config.getint("rebin")
        if rebin is None:
            raise DataError("Missing argument 'rebin' required by SdssData")
        config["delta log lambda"] = str(rebin * 1e-4)
        del config["rebin"]

        config["wave solution"] = "log"
Exemplo n.º 5
0
    def read_data(self):
        """Read the spectra and formats its data as Forest instances.

        Method to be implemented by child classes.

        Return
        ------
        is_mock: bool
        True if mocks are read, False otherwise

        is_sv: bool
        True if all the read data belong to SV. False otherwise

        Raise
        -----
        DataError if no quasars were found
        """
        raise DataError(
            "Function 'read_data' was not overloaded by child class")
Exemplo n.º 6
0
    def format_data(self,
                    catalogue,
                    spectrographs_data,
                    targetid_spec,
                    reso_from_truth=False):
        """After data has been read, format it into DesiForest instances

        Instances will be DesiForest or DesiPk1dForest depending on analysis_type

        Arguments
        ---------
        catalogue: astropy.table.Table
        The quasar catalogue fragment associated with this data

        spectrographs_data: dict
        The read data

        targetid_spec: int
        Targetid of the objects to format

        reso_from_truth: bool - Default: False
        Specifies whether resolution matrixes are read from truth files (True)
        or directly from data (False)

        Return
        ------
        forests_by_targetid: dict
        Dictionary were forests are stored.

        num_data: int
        The number of instances loaded
        """
        num_data = 0
        forests_by_targetid = {}

        # Loop over quasars in catalogue fragment
        for row in catalogue:
            # Find which row in tile contains this quasar
            # It should be there by construction
            targetid = row["TARGETID"]
            w_t = np.where(targetid_spec == targetid)[0]
            if len(w_t) == 0:
                self.logger.warning(
                    f"Error reading {targetid}. Ignoring object")
                continue
            if len(w_t) > 1:
                self.logger.warning(
                    "Warning: more than one spectrum in this file "
                    f"for {targetid}")
            else:
                w_t = w_t[0]
            # Construct DesiForest instance
            # Fluxes from the different spectrographs will be coadded
            for spec in spectrographs_data.values():
                if self.use_non_coadded_spectra:
                    ivar = np.atleast_2d(spec['IVAR'][w_t])
                    ivar_coadded_flux = np.atleast_2d(
                        ivar * spec['FLUX'][w_t]).sum(axis=0)
                    ivar = ivar.sum(axis=0)
                    flux = (ivar_coadded_flux / ivar)
                else:
                    flux = spec['FLUX'][w_t].copy()
                    ivar = spec['IVAR'][w_t].copy()

                args = {
                    "flux": flux,
                    "ivar": ivar,
                    "targetid": targetid,
                    "ra": row['RA'],
                    "dec": row['DEC'],
                    "z": row['Z'],
                }
                args["log_lambda"] = np.log10(spec['WAVELENGTH'])

                if self.analysis_type == "BAO 3D":
                    forest = DesiForest(**args)
                elif self.analysis_type == "PK 1D":
                    if self.use_non_coadded_spectra:
                        exposures_diff = exp_diff_desi(spec, w_t)
                        if exposures_diff is None:
                            continue
                    else:
                        exposures_diff = np.zeros(spec['WAVELENGTH'].shape)
                    if reso_from_truth:
                        reso_sum = spec['RESO'][:, :]
                    else:
                        if len(spec['RESO'][w_t].shape) < 3:
                            reso_sum = spec['RESO'][w_t].copy()
                        else:
                            reso_sum = spec['RESO'][w_t].sum(axis=0)
                    reso_in_pix, reso_in_km_per_s = spectral_resolution_desi(
                        reso_sum, spec['WAVELENGTH'])
                    args["exposures_diff"] = exposures_diff
                    args["reso"] = reso_in_km_per_s
                    args["resolution_matrix"] = reso_sum
                    args["reso_pix"] = reso_in_pix

                    forest = DesiPk1dForest(**args)
                # this should never be entered added here in case at some point
                # we add another analysis type
                else:  # pragma: no cover
                    raise DataError(
                        "Unkown analysis type. Expected 'BAO 3D'"
                        f"or 'PK 1D'. Found '{self.analysis_type}'")

                # rebin arrays
                # this needs to happen after all arrays are initialized by
                # Forest constructor
                forest.rebin()

                # keep the forest
                if targetid in forests_by_targetid:
                    existing_forest = forests_by_targetid[targetid]
                    existing_forest.coadd(forest)
                    forests_by_targetid[targetid] = existing_forest
                else:
                    forests_by_targetid[targetid] = forest

                num_data += 1
        return forests_by_targetid, num_data
Exemplo n.º 7
0
    def read_data(self):
        """Read the spectra and formats its data as Forest instances.

        Return
        ------
        is_mock: bool
        False as DESI data are not mocks

        is_sv: bool
        True if all the read data belong to SV. False otherwise

        Raise
        -----
        DataError if the analysis type is PK 1D and resolution data is not present
        DataError if no quasars were found
        """
        if np.any((self.catalogue['TILEID'] < 60000)
                  & (self.catalogue['TILEID'] >= 1000)):
            is_sv = False
        else:
            is_sv = True

        coadd_name = "spectra" if self.use_non_coadded_spectra else "coadd"

        files_in = sorted(
            glob.glob(os.path.join(self.input_directory,
                                   f"**/{coadd_name}-*.fits"),
                      recursive=True))

        if "cumulative" in self.input_directory:
            petal_tile_night = [
                f"{entry['PETAL_LOC']}-{entry['TILEID']}-thru{entry['LAST_NIGHT']}"
                for entry in self.catalogue
            ]
        else:
            petal_tile_night = [
                f"{entry['PETAL_LOC']}-{entry['TILEID']}-{entry['NIGHT']}"
                for entry in self.catalogue
            ]

        # this uniqueness check is to ensure each petal/tile/night combination
        # only appears once in the filelist
        petal_tile_night_unique = np.unique(petal_tile_night)

        filenames = []
        forests_by_targetid = {}
        for file_in in files_in:
            for petal_tile_night in petal_tile_night_unique:
                if petal_tile_night in os.path.basename(file_in):
                    filenames.append(file_in)
        filenames = np.unique(filenames)

        if self.num_processors > 1:
            arguments = [(filename, self.catalogue) for filename in filenames]
            context = multiprocessing.get_context('fork')
            with context.Pool(processes=self.num_processors) as pool:
                imap_it = pool.imap(
                    DesiTileFileHandler(self.analysis_type,
                                        self.use_non_coadded_spectra,
                                        self.logger, self.input_directory),
                    arguments)
                for forests_by_targetid_aux, _ in imap_it:
                    # Merge each dict to master forests_by_targetid
                    merge_new_forest(forests_by_targetid,
                                     forests_by_targetid_aux)
        else:
            num_data = 0
            reader = DesiTileFileHandler(self.analysis_type,
                                         self.use_non_coadded_spectra,
                                         self.logger, self.input_directory)
            for index, filename in enumerate(filenames):
                forests_by_targetid_aux, num_data_aux = reader(
                    (filename, self.catalogue))
                merge_new_forest(forests_by_targetid, forests_by_targetid_aux)
                num_data += num_data_aux
                self.logger.progress(
                    f"read tile {index} of {len(filename)}. ndata: {num_data}")

                self.logger.progress(
                    f"Found {num_data} quasars in input files")

        if len(forests_by_targetid) == 0:
            raise DataError("No Quasars found, stopping here")

        self.forests = list(forests_by_targetid.values())

        return False, is_sv
Exemplo n.º 8
0
    def read_file(self, filename, catalogue):
        """Read the spectra and formats its data as Forest instances.

        Arguments
        ---------
        filename: str
        Name of the file to read

        catalogue: astropy.table.Table
        The quasar catalogue fragment associated with this file

        Returns:
        ---------
        forests_by_targetid: dict
        Dictionary were forests are stored.

        num_data: int
        The number of instances loaded

        Raise
        -----
        DataError if the analysis type is PK 1D and resolution data is not present
        """
        try:
            hdul = fitsio.FITS(filename)
        except IOError:
            self.logger.warning(
                f"Error reading file {filename}. Ignoring file")
            return {}, 0

        fibermap = hdul['FIBERMAP'].read()

        ra = fibermap['TARGET_RA']
        dec = fibermap['TARGET_DEC']
        tile_spec = fibermap['TILEID'][0]
        if "cumulative" in self.input_directory:
            night_spec = int(filename.split('thru')[-1].split('.')[0])
        else:
            night_spec = int(filename.split('-')[-1].split('.')[0])

        colors = ['B', 'R', 'Z']
        ra = np.radians(ra)
        dec = np.radians(dec)

        petal_spec = fibermap['PETAL_LOC'][0]

        spectrographs_data = {}
        for color in colors:
            try:
                spec = {}
                spec['WAVELENGTH'] = hdul[f'{color}_WAVELENGTH'].read()
                spec['FLUX'] = hdul[f'{color}_FLUX'].read()
                spec['IVAR'] = (hdul[f'{color}_IVAR'].read() *
                                (hdul[f'{color}_MASK'].read() == 0))
                if self.analysis_type == "PK 1D":
                    if f"{color}_RESOLUTION" in hdul:
                        spec["RESO"] = hdul[f"{color}_RESOLUTION"].read()
                    else:
                        raise DataError(
                            "Error while reading {color} band from "
                            "{filename}. Analysis type is  'PK 1D', "
                            "but file does not contain HDU "
                            f"'{color}_RESOLUTION' ")
                w = np.isnan(spec['FLUX']) | np.isnan(spec['IVAR'])
                for key in ['FLUX', 'IVAR']:
                    spec[key][w] = 0.
                spectrographs_data[color] = spec
            except OSError:
                self.logger.warning(
                    f"Error while reading {color} band from {filename}."
                    "Ignoring color.")

        hdul.close()

        if "cumulative" in self.input_directory:
            select = ((catalogue['TILEID'] == tile_spec) &
                      (catalogue['PETAL_LOC'] == petal_spec) &
                      (catalogue['LAST_NIGHT'] == night_spec))
        else:
            select = ((catalogue['TILEID'] == tile_spec) &
                      (catalogue['PETAL_LOC'] == petal_spec) &
                      (catalogue['NIGHT'] == night_spec))
        self.logger.progress(
            f'This is tile {tile_spec}, petal {petal_spec}, night {night_spec}'
        )

        forests_by_targetid, num_data = self.format_data(
            catalogue[select],
            spectrographs_data,
            fibermap["TARGETID"],
        )

        return forests_by_targetid, num_data
Exemplo n.º 9
0
    def read_from_spec(self, catalogue):
        """Read the spectra and formats its data as Forest instances.

        Arguments
        ---------
        catalogue: astropy.table.Table
        Table with the DRQ catalogue
        """
        self.logger.progress(f"Reading {len(catalogue)} objects")

        forests_by_thingid = {}
        #-- Loop over unique objects
        for row in catalogue:
            thingid = row['THING_ID']
            plate = row["PLATE"]
            mjd = row["MJD"]
            fiberid = row["FIBERID"]

            filename = (f"{self.input_directory}/{plate}/spec-{plate}-{mjd}-"
                        f"{fiberid:04d}.fits")
            try:
                hdul = fitsio.FITS(filename)
            except IOError:
                self.logger.warning(f"Error reading {filename}. Ignoring file")
                continue
            self.logger.progress(f"Read {filename}")

            log_lambda = np.array(hdul[1]["loglam"][:], dtype=np.float64)
            flux = np.array(hdul[1]["flux"][:], dtype=np.float64)
            ivar = (np.array(hdul[1]["ivar"][:], dtype=np.float64) *
                    hdul[1]["and_mask"][:] == 0)

            if self.analysis_type == "BAO 3D":
                forest = SdssForest(
                    **{
                        "log_lambda": log_lambda,
                        "flux": flux,
                        "ivar": ivar,
                        "thingid": thingid,
                        "ra": row["RA"],
                        "dec": row["DEC"],
                        "z": row["Z"],
                        "plate": plate,
                        "mjd": mjd,
                        "fiberid": fiberid
                    })
            elif self.analysis_type == "PK 1D":
                # compute difference between exposure
                exposures_diff = exp_diff(hdul, log_lambda)
                # compute spectral resolution
                wdisp = hdul[1]["wdisp"][:]
                reso = spectral_resolution(wdisp, True, fiberid, log_lambda)

                forest = SdssPk1dForest(
                    **{
                        "log_lambda": log_lambda,
                        "flux": flux,
                        "ivar": ivar,
                        "thingid": thingid,
                        "ra": row["RA"],
                        "dec": row["DEC"],
                        "z": row["Z"],
                        "plate": plate,
                        "mjd": mjd,
                        "fiberid": fiberid,
                        "exposures_diff": exposures_diff,
                        "reso": reso,
                        "reso_pix": wdisp
                    })
            else:
                raise DataError(f"analysis_type = {self.analysis_type}")

            forest.rebin()
            if thingid in forests_by_thingid:
                forests_by_thingid[thingid].coadd(forest)
            else:
                forests_by_thingid[thingid] = forest

        self.forests = list(forests_by_thingid.values())
Exemplo n.º 10
0
Arquivo: data.py Projeto: igmhub/picca
    def __parse_config(self, config):
        """Parse the configuration options

        Arguments
        ---------
        config: configparser.SectionProxy
        Parsed options to initialize class

        Raise
        -----
        DataError upon missing required variables
        """
        # setup Forest class variables
        wave_solution = config.get("wave solution")

        if wave_solution is None:
            raise DataError(
                "Missing argument 'wave solution' required by Data")
        if wave_solution not in ["lin", "log"]:
            raise DataError(
                "Unrecognised value for 'wave solution'. Expected either "
                f"'lin' or 'log'. Found '{wave_solution}'.")

        if wave_solution == "log":
            pixel_step = config.getfloat("delta log lambda")
            if pixel_step is None:
                raise DataError(
                    "Missing argument 'delta log lambda' required by "
                    "Data when 'wave solution' is set to 'log'")
            pixel_step_rest_frame = config.getfloat(
                "delta log lambda rest frame")
            if pixel_step_rest_frame is None:
                pixel_step_rest_frame = pixel_step
                self.logger.info(
                    "'delta log lambda rest frame' not set, using "
                    "the same value as for 'delta log lambda' "
                    f"({pixel_step_rest_frame})")
        elif wave_solution == "lin":
            pixel_step = config.getfloat("delta lambda")
            if pixel_step is None:
                raise DataError("Missing argument 'delta lambda' required by "
                                "Data when 'wave solution' is set to 'lin'")
            pixel_step_rest_frame = config.getfloat("delta lambda rest frame")
            if pixel_step_rest_frame is None:
                pixel_step_rest_frame = pixel_step
                self.logger.info(
                    "'delta lambda rest frame' not set, using "
                    f"the same value as for 'delta lambda' ({pixel_step_rest_frame})"
                )
        # this should not be reached as wave_solution is either "lin" or "log"
        # added here only in case we add another wave_solution in the future
        else:  # pragma: no cover
            raise DataError(
                "Unrecognised value for 'wave solution'. Expected either "
                f"'lin' or 'log'. Found '{wave_solution}'.")

        lambda_max = config.getfloat("lambda max")
        if lambda_max is None:
            raise DataError("Missing argument 'lambda max' required by Data")
        lambda_max_rest_frame = config.getfloat("lambda max rest frame")
        if lambda_max_rest_frame is None:
            raise DataError(
                "Missing argument 'lambda max rest frame' required by Data")
        lambda_min = config.getfloat("lambda min")
        if lambda_min is None:
            raise DataError("Missing argument 'lambda min' required by Data")
        lambda_min_rest_frame = config.getfloat("lambda min rest frame")
        if lambda_min_rest_frame is None:
            raise DataError(
                "Missing argument 'lambda min rest frame' required by Data")

        Forest.set_class_variables(lambda_min, lambda_max,
                                   lambda_min_rest_frame,
                                   lambda_max_rest_frame, pixel_step,
                                   pixel_step_rest_frame, wave_solution)

        # instance variables
        self.analysis_type = config.get("analysis type")
        if self.analysis_type is None:
            raise DataError(
                "Missing argument 'analysis type' required by Data")
        if self.analysis_type not in accepted_analysis_type:
            raise DataError("Invalid argument 'analysis type' required by "
                            f"Data. Found: '{self.analysis_type}'. Accepted "
                            "values: " + ",".join(accepted_analysis_type))

        if self.analysis_type == "PK 1D":
            lambda_abs_igm_name = config.get("lambda abs IGM")
            if lambda_abs_igm_name is None:
                raise DataError(
                    "Missing argument 'lambda abs IGM' required by Data "
                    "when 'analysys type' is 'PK 1D'")
            Pk1dForest.lambda_abs_igm = ABSORBER_IGM.get(lambda_abs_igm_name)
            if Pk1dForest.lambda_abs_igm is None:
                raise DataError(
                    "Invalid argument 'lambda abs IGM' required by "
                    f"Data. Found: '{lambda_abs_igm_name}'. Accepted "
                    "values: " + ", ".join(ABSORBER_IGM))

        self.input_directory = config.get("input directory")
        if self.input_directory is None:
            raise DataError(
                "Missing argument 'input directory' required by Data")

        self.min_num_pix = config.getint("minimum number pixels in forest")
        if self.min_num_pix is None:
            raise DataError(
                "Missing argument 'minimum number pixels in forest' "
                "required by Data")

        self.out_dir = config.get("out dir")
        if self.out_dir is None:
            raise DataError("Missing argument 'out dir' required by Data")

        self.rejection_log_file = config.get("rejection log file")
        if self.rejection_log_file is None:
            raise DataError(
                "Missing argument 'rejection log file' required by Data")
        if "/" in self.rejection_log_file:
            raise DataError("Error constructing Data. "
                            "'rejection log file' should not incude folders. "
                            f"Found: {self.rejection_log_file}")
        if not (self.rejection_log_file.endswith(".fits")
                or self.rejection_log_file.endswith(".fits.gz")):
            raise DataError("Error constructing Data. Invalid extension for "
                            "'rejection log file'. Filename "
                            "should en with '.fits' or '.fits.gz'. Found "
                            f"'{self.rejection_log_file}'")

        if self.analysis_type == "BAO 3D":
            self.min_snr = config.getfloat("minimal snr bao3d")
        elif self.analysis_type == "PK 1D":
            self.min_snr = config.getfloat("minimal snr pk1d")
        # this should not be reached as analysis_type is either "BAO 3D" or
        # "PK 1D" added here only in case we add another analysis_type in the
        # future
        else:  # pragma: no cover
            raise DataError("Invalid argument 'analysis type' required by "
                            f"Data. Found: '{self.analysis_type}'. Accepted "
                            "values: " + ",".join(accepted_analysis_type))
        if self.min_snr is None:
            raise DataError(
                "Missing argument 'minimal snr bao3d' (if 'analysis type' = "
                "'BAO 3D') or ' minimal snr pk1d' (if 'analysis type' = 'Pk1d') "
                "required by Data")