Python ParquetTable示例，lsst.pipe.tasks.parquetTable.ParquetTable Python示例

示例#1

0

显示文件

    def runDataRef(self, patchRefList):
        """Matches visits to coadd and writes output

        Visits to match are chosen by taking all input coadd patches (collected from
        requested tract), and querying for all visits used to construct that coadd.
        The set of total visits to put in the match table is union of all 

        Parameters
        ----------
        patchRefList : `list`
            List of patch datarefs from which visits will be selected.

        Returns
        -------
        matchDf : `pandas.DataFrame`
            Dataframe of match data.  Column index is multi-level, with the first
            level being visit number, and second level being `['matchId', 'distance']`.

        """
        butler = patchRefList[0].getButler()
        tract = patchRefList[0].dataId["tract"]
        filt = patchRefList[0].dataId["filter"]

        # Collect all visits that overlap any part of the requested tract
        allVisits = set()
        for patchRef in patchRefList:
            try:
                exp = butler.get("deepCoadd_calexp", dataId=patchRef.dataId)
                allVisits = allVisits.union(set(exp.getInfo().getCoaddInputs().visits["id"]))
            except NoResults:
                pass
        self.log.info("matching {} visits to tract {}: {}".format(len(allVisits), tract, allVisits))

        # Match
        columns = ["coord_ra", "coord_dec"]
        coaddDf = (butler.get(self.inputDataset, tract=tract, filter=filt, subdir="").
                   toDataFrame(columns=columns))

        column_index = pd.MultiIndex.from_product([["matchId", "distance"], allVisits])
        matchDf = pd.DataFrame(columns=column_index, index=coaddDf.index)
        for i, visit in enumerate(allVisits):
            try:
                visitDf = (butler.get("analysisVisitTable", tract=tract, filter=filt, visit=visit, subdir="").
                           toDataFrame(columns=columns))
            except NoResults:
                self.log.info(f"({i+1} of {len(allVisits)}) visit {visit}: analysisVisitTable not available")
                continue

            good, ids, distance = self.matchCats(coaddDf, visitDf)

            matchDf.loc[good, ("matchId", visit)] = ids
            matchDf.loc[good, ("distance", visit)] = distance
            self.log.info(
                "({} of {}) visit {}: {} sources matched.".format(i + 1, len(allVisits), visit, good.sum())
            )

        butler.put(ParquetTable(dataFrame=matchDf), self.outputDataset, tract=tract, filter=filt)
        return matchDf

示例#2

0

显示文件

    def runDataRef(self, dataRef):
        """Add required magnitudes in each band and run the classifier, read in the reference catalog and
        the truth table reference object.

        Parameters
        ----------
        dataRef : `lsst.daf.persistence.butlerSubset.ButlerDataRef`
            Data reference defining the patch to be turned into features for training the classifiers
            Used to access the folowing data products:
                ``deepCoadd_obj`` produced by `writeObjectTask`
                ``deepCoadd_calecp_calib``
                ``HST_truth_table_star_galaxy_refCat``

        Notes
        -----
        Check that all the required magnitudes are in the file, then add the extra magnitudes needed to
        create the features needed to train the classifier for the given patch and tract. The magnitude
        columns are calculated from the ``self.config.modelColName`` columns in each band using the
        calibration from the ``deepCoadd_calexp_calib``. Operates on a signle patch. Read in the associated
        reference catalog and the given truth table. The default is a catalog from Leauthaud et al 2007
        which uses HST COSMOS data.
        """

        cat = dataRef.get("deepCoadd_obj").toDataFrame({"dataset": "meas"})
        refInfo = {
            "dataset": "ref",
            "column": ["coord_ra", "coord_dec"],
            "filter": "HSC-G"
        }
        refCat = dataRef.get("deepCoadd_obj").toDataFrame(refInfo)

        filters = self.config.filters
        filtersInCat = set(cat.columns.get_level_values(0))
        if not filtersInCat >= set(filters):
            missingFilters = list(set(filters) - filtersInCat)
            raise RuntimeError(
                "Not all required filters are present in the catalog: \
                                missing {}.".format(missingFilters))

        cat = self.addMagnitudes(cat, dataRef, filters)

        config = LoadIndexedReferenceObjectsTask.ConfigClass()
        config.ref_dataset_name = 'HST_truth_table_star_galaxy_refCat'
        butler = dataRef.getButler()
        truthRefObj = LoadIndexedReferenceObjectsTask(butler, config=config)

        cat = self.run(cat, filters, truthRefObj, refCat)
        cat = ParquetTable(dataFrame=cat)
        dataRef.put(cat, "deepCoadd_sg_features")

示例#3

0

显示文件

文件： joinFeatures.py 项目： lsst-dm/qa_explorer

    def runDataRef(self, dataRefs):
        """Read in all the patch files for each dataRef and then concatenate them into one data frame. Then take
        these data frames and concatenate them together from all the input reruns.

        Parameters
        ----------
        dataRefs : `dict`
            Dictionary of names of reruns and their associated dataRefs
            The dataRefs are then used to access the following data products:
                ``deepCoadd_sg_features`` produced by `StarGalaxyFeaturesTask`

        Notes
        -----
        The dict of dataRefs contains the dataRef from each rerun and an associated name that can be used to
        distinguish between the dataRefs if required.
        """

        rerunDataFrames = []
        for dataRefName in dataRefs.keys():
            dataRef = dataRefs[dataRefName]
            skymap = dataRef.get("deepCoadd_skyMap")
            tract = dataRef.dataId["tract"]
            tractInfo = skymap[tract]
            featDataFrames = []
            for patchInfo in tractInfo:
                patch = str(patchInfo.getIndex()[0]) + "," + str(
                    patchInfo.getIndex()[1])
                try:
                    featDataFrame = dataRef.get("deepCoadd_sg_features",
                                                patch=patch).toDataFrame()
                except lsst.daf.persistence.butlerExceptions.NoResults:
                    self.log.warn(
                        "No 'deepCoadd_sg_features' found for patch: {} in dataRef "
                        "named: {}".format(patch, dataRefName))
                    continue
                featDataFrames.append(featDataFrame)

            rerunDataFrame = pd.concat(featDataFrames)
            rerunDataFrames.append(rerunDataFrame)

        tractDataFrame = self.run(rerunDataFrames, self.config.sourceTypes)
        tractDataFrame = ParquetTable(dataFrame=tractDataFrame)
        dataRefs[self.config.outputRerunName].put(
            tractDataFrame, "deepCoadd_sg_features_tract")

示例#4

0

显示文件

 def getParq(self, filename, df):
     return ParquetTable(filename), ParquetTable(dataFrame=df)

示例#5

0

显示文件

 def simulateParquet(self, dataDict):
     df = pd.DataFrame(dataDict)
     return ParquetTable(dataFrame=df)

示例#6

0

显示文件

文件： transformDiaSourceCatalog.py 项目： lsst/ap_association

    def run(self, diaSourceCat, diffIm, band, ccdVisitId, funcs=None):
        """Convert input catalog to ParquetTable/Pandas and run functors.

        Additionally, add new columns for stripping information from the
        exposure and into the DiaSource catalog.

        Parameters
        ----------
        diaSourceCat : `lsst.afw.table.SourceCatalog`
            Catalog of sources measured on the difference image.
        diffIm : `lsst.afw.image.Exposure`
            Result of subtracting template and science images.
        band : `str`
            Filter band of the science image.
        ccdVisitId : `int`
            Identifier for this detector+visit.
        funcs : `lsst.pipe.tasks.functors.Functors`
            Functors to apply to the catalog's columns.

        Returns
        -------
        results : `lsst.pipe.base.Struct`
            Results struct with components.

            - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
              and renamed columns.
              (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
        """
        self.log.info(
            "Transforming/standardizing the DiaSource table ccdVisitId: %i",
            ccdVisitId)

        diaSourceDf = diaSourceCat.asAstropy().to_pandas()

        def getSignificance():
            """Return the significance value of the first peak in each source
            footprint."""
            size = len(diaSourceDf)
            result = np.full(size, np.nan)
            for i in range(size):
                record = diaSourceCat[i]
                if self.config.doRemoveSkySources and record["sky_source"]:
                    continue
                peaks = record.getFootprint().peaks
                if "significance" in peaks.schema:
                    result[i] = peaks[0]["significance"]
            return result

        diaSourceDf["snr"] = getSignificance()

        if self.config.doRemoveSkySources:
            diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]]

        diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
        diaSourceDf["ccdVisitId"] = ccdVisitId
        diaSourceDf["filterName"] = band
        diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate(
        ).get(system=DateTime.MJD)
        diaSourceDf["diaObjectId"] = 0
        diaSourceDf["ssObjectId"] = 0

        if self.config.doPackFlags:
            # either bitpack the flags
            self.bitPackFlags(diaSourceDf)
        else:
            # or add the individual flag functors
            self.addUnpackedFlagFunctors()
            # and remove the packed flag functor
            if 'flags' in self.funcs.funcDict:
                del self.funcs.funcDict['flags']

        df = self.transform(band,
                            ParquetTable(dataFrame=diaSourceDf),
                            self.funcs,
                            dataId=None).df

        return pipeBase.Struct(diaSourceTable=df, )

示例#7

0

显示文件

文件： classifyObjects.py 项目： lsst-dm/qa_explorer

    def runDataRef(self, dataRef):
        """Add required magnitudes in each band and run the classifier

        Parameters
        ----------
        dataRef : `lsst.daf.persistence.butlerSubset.ButlerDataRef`
            Data reference defining the patch to be classified
            Used to access the following data products:
                deepCoadd_obj produced by writeObjectTableTask
                deepCoadd_calexp_calib

        Notes
        -----
        Check that all the required filters are in the file, then add the extra magnitude columns needed to
        the table and run the classifier for the given patch and tract. The magnitude columns are calculated
        from the ``self.config.modelColName`` columns in each band using the calibration from the
        ``deepCoadd_calexp_calib``. Operates on a single patch.
        """

        cat = dataRef.get("deepCoadd_obj").toDataFrame({"dataset": "meas"})

        # To Do: DM-14855 - Train classifiers on other datasets
        # For now raise a not implemented error for other cameras, eventually needs other trained classifiers
        cameraName = dataRef.get("camera").getName()
        if cameraName != "HSC":
            raise NotImplementedError("Currently only HSC is supported")

        # To Do: DM-14539 - Move this to somewhere else
        # put this into /datasets/hsc then the butler will get it
        # Add classifier_pickle to obs_base datasets, copy deepCoadd_skyMap.
        # Filters and column headings come from the classifier pickle
        clfDictMorph = dataRef.get("starGalaxy_morphOnlyClassifier",
                                   label=self.config.label)
        filters = clfDictMorph["filters"]
        clfMorph = clfDictMorph["clf"]
        colsToUseMorph = clfDictMorph["colsToUse"]

        clfDict = dataRef.get("starGalaxy_classifier", label=self.config.label)
        filters = clfDict["filters"]
        clf = clfDict["clf"]
        colsToUse = clfDict["colsToUse"]

        filtersInCat = set(cat.columns.get_level_values(0))
        if not filtersInCat >= set(filters):
            missingFilters = list(set(filters) - filtersInCat)
            raise RuntimeError(
                "Not all required filters are present in the catalog: \
                                missing {}.".format(missingFilters))

        colsRequired = {
            self.config.seeingColName + "_xx",
            self.config.seeingColName + "_xy",
            self.config.seeingColName + "_yy", self.config.psfColName,
            self.config.modelColName, self.config.psfColName + "Err",
            self.config.modelColName + "Err"
        }
        for band in filters:
            colsInCat = set(cat[band].columns)
            missingCols = list(set(colsRequired) - colsInCat)
            if len(missingCols) > 0:
                raise RuntimeError(
                    "Not all required columns are present in catalog: \
                                    missing {} in {}.".format(
                        missingCols, band))

        cat = self.addMagnitudes(cat, dataRef, filters)
        cat = self.run(cat, filters, clfMorph, colsToUseMorph, clf, colsToUse)

        cat = ParquetTable(dataFrame=cat)
        dataRef.put(cat, "deepCoadd_sg")