def runDataRef(self, patchRefList): """Matches visits to coadd and writes output Visits to match are chosen by taking all input coadd patches (collected from requested tract), and querying for all visits used to construct that coadd. The set of total visits to put in the match table is union of all Parameters ---------- patchRefList : `list` List of patch datarefs from which visits will be selected. Returns ------- matchDf : `pandas.DataFrame` Dataframe of match data. Column index is multi-level, with the first level being visit number, and second level being `['matchId', 'distance']`. """ butler = patchRefList[0].getButler() tract = patchRefList[0].dataId["tract"] filt = patchRefList[0].dataId["filter"] # Collect all visits that overlap any part of the requested tract allVisits = set() for patchRef in patchRefList: try: exp = butler.get("deepCoadd_calexp", dataId=patchRef.dataId) allVisits = allVisits.union(set(exp.getInfo().getCoaddInputs().visits["id"])) except NoResults: pass self.log.info("matching {} visits to tract {}: {}".format(len(allVisits), tract, allVisits)) # Match columns = ["coord_ra", "coord_dec"] coaddDf = (butler.get(self.inputDataset, tract=tract, filter=filt, subdir=""). toDataFrame(columns=columns)) column_index = pd.MultiIndex.from_product([["matchId", "distance"], allVisits]) matchDf = pd.DataFrame(columns=column_index, index=coaddDf.index) for i, visit in enumerate(allVisits): try: visitDf = (butler.get("analysisVisitTable", tract=tract, filter=filt, visit=visit, subdir=""). toDataFrame(columns=columns)) except NoResults: self.log.info(f"({i+1} of {len(allVisits)}) visit {visit}: analysisVisitTable not available") continue good, ids, distance = self.matchCats(coaddDf, visitDf) matchDf.loc[good, ("matchId", visit)] = ids matchDf.loc[good, ("distance", visit)] = distance self.log.info( "({} of {}) visit {}: {} sources matched.".format(i + 1, len(allVisits), visit, good.sum()) ) butler.put(ParquetTable(dataFrame=matchDf), self.outputDataset, tract=tract, filter=filt) return matchDf
def runDataRef(self, dataRef): """Add required magnitudes in each band and run the classifier, read in the reference catalog and the truth table reference object. Parameters ---------- dataRef : `lsst.daf.persistence.butlerSubset.ButlerDataRef` Data reference defining the patch to be turned into features for training the classifiers Used to access the folowing data products: ``deepCoadd_obj`` produced by `writeObjectTask` ``deepCoadd_calecp_calib`` ``HST_truth_table_star_galaxy_refCat`` Notes ----- Check that all the required magnitudes are in the file, then add the extra magnitudes needed to create the features needed to train the classifier for the given patch and tract. The magnitude columns are calculated from the ``self.config.modelColName`` columns in each band using the calibration from the ``deepCoadd_calexp_calib``. Operates on a signle patch. Read in the associated reference catalog and the given truth table. The default is a catalog from Leauthaud et al 2007 which uses HST COSMOS data. """ cat = dataRef.get("deepCoadd_obj").toDataFrame({"dataset": "meas"}) refInfo = { "dataset": "ref", "column": ["coord_ra", "coord_dec"], "filter": "HSC-G" } refCat = dataRef.get("deepCoadd_obj").toDataFrame(refInfo) filters = self.config.filters filtersInCat = set(cat.columns.get_level_values(0)) if not filtersInCat >= set(filters): missingFilters = list(set(filters) - filtersInCat) raise RuntimeError( "Not all required filters are present in the catalog: \ missing {}.".format(missingFilters)) cat = self.addMagnitudes(cat, dataRef, filters) config = LoadIndexedReferenceObjectsTask.ConfigClass() config.ref_dataset_name = 'HST_truth_table_star_galaxy_refCat' butler = dataRef.getButler() truthRefObj = LoadIndexedReferenceObjectsTask(butler, config=config) cat = self.run(cat, filters, truthRefObj, refCat) cat = ParquetTable(dataFrame=cat) dataRef.put(cat, "deepCoadd_sg_features")
def runDataRef(self, dataRefs): """Read in all the patch files for each dataRef and then concatenate them into one data frame. Then take these data frames and concatenate them together from all the input reruns. Parameters ---------- dataRefs : `dict` Dictionary of names of reruns and their associated dataRefs The dataRefs are then used to access the following data products: ``deepCoadd_sg_features`` produced by `StarGalaxyFeaturesTask` Notes ----- The dict of dataRefs contains the dataRef from each rerun and an associated name that can be used to distinguish between the dataRefs if required. """ rerunDataFrames = [] for dataRefName in dataRefs.keys(): dataRef = dataRefs[dataRefName] skymap = dataRef.get("deepCoadd_skyMap") tract = dataRef.dataId["tract"] tractInfo = skymap[tract] featDataFrames = [] for patchInfo in tractInfo: patch = str(patchInfo.getIndex()[0]) + "," + str( patchInfo.getIndex()[1]) try: featDataFrame = dataRef.get("deepCoadd_sg_features", patch=patch).toDataFrame() except lsst.daf.persistence.butlerExceptions.NoResults: self.log.warn( "No 'deepCoadd_sg_features' found for patch: {} in dataRef " "named: {}".format(patch, dataRefName)) continue featDataFrames.append(featDataFrame) rerunDataFrame = pd.concat(featDataFrames) rerunDataFrames.append(rerunDataFrame) tractDataFrame = self.run(rerunDataFrames, self.config.sourceTypes) tractDataFrame = ParquetTable(dataFrame=tractDataFrame) dataRefs[self.config.outputRerunName].put( tractDataFrame, "deepCoadd_sg_features_tract")
def getParq(self, filename, df): return ParquetTable(filename), ParquetTable(dataFrame=df)
def simulateParquet(self, dataDict): df = pd.DataFrame(dataDict) return ParquetTable(dataFrame=df)
def run(self, diaSourceCat, diffIm, band, ccdVisitId, funcs=None): """Convert input catalog to ParquetTable/Pandas and run functors. Additionally, add new columns for stripping information from the exposure and into the DiaSource catalog. Parameters ---------- diaSourceCat : `lsst.afw.table.SourceCatalog` Catalog of sources measured on the difference image. diffIm : `lsst.afw.image.Exposure` Result of subtracting template and science images. band : `str` Filter band of the science image. ccdVisitId : `int` Identifier for this detector+visit. funcs : `lsst.pipe.tasks.functors.Functors` Functors to apply to the catalog's columns. Returns ------- results : `lsst.pipe.base.Struct` Results struct with components. - ``diaSourceTable`` : Catalog of DiaSources with calibrated values and renamed columns. (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) """ self.log.info( "Transforming/standardizing the DiaSource table ccdVisitId: %i", ccdVisitId) diaSourceDf = diaSourceCat.asAstropy().to_pandas() def getSignificance(): """Return the significance value of the first peak in each source footprint.""" size = len(diaSourceDf) result = np.full(size, np.nan) for i in range(size): record = diaSourceCat[i] if self.config.doRemoveSkySources and record["sky_source"]: continue peaks = record.getFootprint().peaks if "significance" in peaks.schema: result[i] = peaks[0]["significance"] return result diaSourceDf["snr"] = getSignificance() if self.config.doRemoveSkySources: diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) diaSourceDf["ccdVisitId"] = ccdVisitId diaSourceDf["filterName"] = band diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate( ).get(system=DateTime.MJD) diaSourceDf["diaObjectId"] = 0 diaSourceDf["ssObjectId"] = 0 if self.config.doPackFlags: # either bitpack the flags self.bitPackFlags(diaSourceDf) else: # or add the individual flag functors self.addUnpackedFlagFunctors() # and remove the packed flag functor if 'flags' in self.funcs.funcDict: del self.funcs.funcDict['flags'] df = self.transform(band, ParquetTable(dataFrame=diaSourceDf), self.funcs, dataId=None).df return pipeBase.Struct(diaSourceTable=df, )
def runDataRef(self, dataRef): """Add required magnitudes in each band and run the classifier Parameters ---------- dataRef : `lsst.daf.persistence.butlerSubset.ButlerDataRef` Data reference defining the patch to be classified Used to access the following data products: deepCoadd_obj produced by writeObjectTableTask deepCoadd_calexp_calib Notes ----- Check that all the required filters are in the file, then add the extra magnitude columns needed to the table and run the classifier for the given patch and tract. The magnitude columns are calculated from the ``self.config.modelColName`` columns in each band using the calibration from the ``deepCoadd_calexp_calib``. Operates on a single patch. """ cat = dataRef.get("deepCoadd_obj").toDataFrame({"dataset": "meas"}) # To Do: DM-14855 - Train classifiers on other datasets # For now raise a not implemented error for other cameras, eventually needs other trained classifiers cameraName = dataRef.get("camera").getName() if cameraName != "HSC": raise NotImplementedError("Currently only HSC is supported") # To Do: DM-14539 - Move this to somewhere else # put this into /datasets/hsc then the butler will get it # Add classifier_pickle to obs_base datasets, copy deepCoadd_skyMap. # Filters and column headings come from the classifier pickle clfDictMorph = dataRef.get("starGalaxy_morphOnlyClassifier", label=self.config.label) filters = clfDictMorph["filters"] clfMorph = clfDictMorph["clf"] colsToUseMorph = clfDictMorph["colsToUse"] clfDict = dataRef.get("starGalaxy_classifier", label=self.config.label) filters = clfDict["filters"] clf = clfDict["clf"] colsToUse = clfDict["colsToUse"] filtersInCat = set(cat.columns.get_level_values(0)) if not filtersInCat >= set(filters): missingFilters = list(set(filters) - filtersInCat) raise RuntimeError( "Not all required filters are present in the catalog: \ missing {}.".format(missingFilters)) colsRequired = { self.config.seeingColName + "_xx", self.config.seeingColName + "_xy", self.config.seeingColName + "_yy", self.config.psfColName, self.config.modelColName, self.config.psfColName + "Err", self.config.modelColName + "Err" } for band in filters: colsInCat = set(cat[band].columns) missingCols = list(set(colsRequired) - colsInCat) if len(missingCols) > 0: raise RuntimeError( "Not all required columns are present in catalog: \ missing {} in {}.".format( missingCols, band)) cat = self.addMagnitudes(cat, dataRef, filters) cat = self.run(cat, filters, clfMorph, colsToUseMorph, clf, colsToUse) cat = ParquetTable(dataFrame=cat) dataRef.put(cat, "deepCoadd_sg")