Exemplo n.º 1
0
    def run(self, tractPatchRefList, butler, selectIdList=[]):
        """!Determine which tracts are non-empty before processing

        @param tractPatchRefList: List of tracts and patches to include in the coaddition
        @param butler: butler reference object
        @param selectIdList: List of data Ids (i.e. visit, ccd) to consider when making the coadd
        @return list of references to sel.runTract function evaluation for each tractPatchRefList member
        """
        pool = Pool("tracts")
        pool.storeSet(butler=butler, skymap=butler.get(self.config.coaddName + "Coadd_skyMap"))
        tractIdList = []
        for patchRefList in tractPatchRefList:
            tractSet = set([patchRef.dataId["tract"] for patchRef in patchRefList])
            assert len(tractSet) == 1
            tractIdList.append(tractSet.pop())

        selectDataList = [data for data in pool.mapNoBalance(self.readSelection, selectIdList) if
                          data is not None]
        nonEmptyList = pool.mapNoBalance(self.checkTract, tractIdList, selectDataList)
        tractPatchRefList = [patchRefList for patchRefList, nonEmpty in
                             zip(tractPatchRefList, nonEmptyList) if nonEmpty]
        self.log.info("Non-empty tracts (%d): %s" % (len(tractPatchRefList),
                                                     [patchRefList[0].dataId["tract"] for patchRefList in
                                                      tractPatchRefList]))

        # Install the dataRef in the selectDataList
        for data in selectDataList:
            data.dataRef = getDataRef(butler, data.dataId, "calexp")

        # Process the non-empty tracts
        return [self.runTract(patchRefList, butler, selectDataList) for patchRefList in tractPatchRefList]
Exemplo n.º 2
0
    def runDataRef(self, expRef):
        """Generate an image of the entire visit

        Only the master node executes this method; it controls the slave nodes,
        which do the data retrieval.

        Parameters
        ----------
        expRef : `lsst.daf.persistence.ButlerDataRef`
            Data reference for exposure.
        """
        pool = Pool()

        if not self._storedButler:
            pool.storeSet(butler=expRef.getButler())

        with self.logOperation("processing %s" % (expRef.dataId, )):
            camera = expRef.get("camera")
            dataIdList = [
                ccdRef.dataId for ccdRef in expRef.subItems("ccd")
                if ccdRef.datasetExists("calexp")
            ]

            exposures = pool.map(self.readImage, dataIdList)
            exposures = dict(keyValue for keyValue in exposures
                             if keyValue is not None)
            image = makeCameraImage(camera, exposures, self.config.binning)
            expRef.put(image, "calexp_camera")
Exemplo n.º 3
0
    def runDataRef(self, patchRefList, selectDataList=[]):
        """!Run association processing on coadds

        Only the master node runs this method.

        @param patchRefList:  Data references to run measurement
        """
        for patchRef in patchRefList:
            if patchRef:
                butler = patchRef.getButler()
                break
        else:
            raise RuntimeError("No valid patches")
        pool = Pool("all")
        pool.cacheClear()
        pool.storeSet(butler=butler)

        # Group all filters by patch
        patches = {}
        tract = None
        for patchRef in patchRefList:
            dataId = patchRef.dataId
            if tract is None:
                tract = dataId["tract"]
            else:
                assert tract == dataId["tract"]

            patch = dataId["patch"]
            if patch not in patches:
                patches[patch] = []
            patches[patch].append(dataId)

        pool.map(self.runAssociation, patches.values(), selectDataList)
Exemplo n.º 4
0
    def runDataRef(self, patchRefList):
        """!Combine forced diaObjects into a single catalog to construct light curves

        Only the master node runs this method.

        @param patchRefList:  Data references to run measurement
        """
        for patchRef in patchRefList:
            if patchRef:
                butler = patchRef.getButler()
                break
        else:
            raise RuntimeError("No valid patches")
        pool = Pool("all")
        pool.cacheClear()
        pool.storeSet(butler=butler)

        # Group all filters by patch
        patches = {}
        tract = None
        for patchRef in patchRefList:
            dataId = patchRef.dataId
            if tract is None:
                tract = dataId["tract"]
            else:
                assert tract == dataId["tract"]

            patch = dataId["patch"]
            if patch not in patches:
                patches[patch] = []
            patches[patch].append(dataId)

        pool.map(self.runCombine, patches.values())
Exemplo n.º 5
0
    def run(self, tractPatchRefList, butler, selectIdList=[]):
        """!Determine which tracts are non-empty before processing

        @param tractPatchRefList: List of tracts and patches to include in the coaddition
        @param butler: butler reference object
        @param selectIdList: List of data Ids (i.e. visit, ccd) to consider when making the coadd
        @return list of references to sel.runTract function evaluation for each tractPatchRefList member
        """
        pool = Pool("tracts")
        pool.storeSet(butler=butler, skymap=butler.get(
            self.config.coaddName + "Coadd_skyMap"))
        tractIdList = []
        for patchRefList in tractPatchRefList:
            tractSet = set([patchRef.dataId["tract"]
                            for patchRef in patchRefList])
            assert len(tractSet) == 1
            tractIdList.append(tractSet.pop())

        selectDataList = [data for data in pool.mapNoBalance(self.readSelection, selectIdList) if
                          data is not None]
        nonEmptyList = pool.mapNoBalance(
            self.checkTract, tractIdList, selectDataList)
        tractPatchRefList = [patchRefList for patchRefList, nonEmpty in
                             zip(tractPatchRefList, nonEmptyList) if nonEmpty]
        self.log.info("Non-empty tracts (%d): %s" % (len(tractPatchRefList),
                                                     [patchRefList[0].dataId["tract"] for patchRefList in
                                                      tractPatchRefList]))

        # Install the dataRef in the selectDataList
        for data in selectDataList:
            data.dataRef = getDataRef(butler, data.dataId, "calexp")

        # Process the non-empty tracts
        return [self.runTract(patchRefList, butler, selectDataList) for patchRefList in tractPatchRefList]
Exemplo n.º 6
0
    def run(self, patchRefList, butler, selectDataList=[]):
        """!Run stacking on a tract

        This method only runs on the master node.

        @param patchRefList: List of patch data references for tract
        @param butler: Data butler
        @param selectDataList: List of SelectStruct for inputs
        """
        pool = Pool("stacker")
        pool.cacheClear()
        pool.storeSet(butler=butler,
                      warpType=self.config.coaddName + "Coadd_directWarp",
                      coaddType=self.config.coaddName + "Coadd")
        patchIdList = [patchRef.dataId for patchRef in patchRefList]

        selectedData = pool.map(self.warp, patchIdList, selectDataList)
        if self.config.doBackgroundReference:
            self.backgroundReference.runDataRef(patchRefList, selectDataList)

        def refNamer(patchRef):
            return tuple(map(int, patchRef.dataId["patch"].split(",")))

        lookup = dict(zip(map(refNamer, patchRefList), selectedData))
        coaddData = [
            Struct(patchId=patchRef.dataId,
                   selectDataList=lookup[refNamer(patchRef)])
            for patchRef in patchRefList
        ]
        pool.map(self.coadd, coaddData)
Exemplo n.º 7
0
    def runDataRef(self, rawRefList, butler):
        pool = Pool("visits")
        pool.cacheClear()
        pool.storeSet(butler=butler)

        #Make unique combinations of visit and CCD number:
        #This 4 needs to be replaced by a config parameter.
        visitCcdIdList = set()
        for rawRef in rawRefList:
            visitCcdIdList.add((rawRef.dataId['visit']<<4)+rawRef.dataId['ccd'])
        visitCcdIdList = list(visitCcdIdList)
        
        #Map visits/ccds out to separate nodes:
        pool.map(self.runVisit, visitCcdIdList, rawRefList)
Exemplo n.º 8
0
 def runDataRef(self, index):
     self.log.info('begining for group %d' % (index))
     #Prepare the storeSet
     pool = Pool("cgcSimBasicBatch")
     pool.cacheClear()
     # expDir  =   "galaxy_basic_psf60"
     # expDir  =   "small0_psf60"
     #expDir  =   "galaxy_basic2Center_psf60"
     expDir = "galaxy_basic2Shift_psf60"
     if not os.path.isdir(expDir):
         os.mkdir(expDir)
     pool.storeSet(expDir=expDir)
     fieldList = np.arange(200 * index, 200 * (index + 1))
     pool.map(self.process, fieldList)
     return
Exemplo n.º 9
0
    def run(self, expRefList, butler, calibId):
        """!Construct a calib from a list of exposure references

        This is the entry point, called by the TaskRunner.__call__

        Only the master node executes this method.

        @param expRefList  List of data references at the exposure level
        @param butler      Data butler
        @param calibId   Identifier dict for calib
        """
        for expRef in expRefList:
            self.addMissingKeys(expRef.dataId, butler, self.config.ccdKeys,
                                'raw')

        outputId = self.getOutputId(expRefList, calibId)
        ccdIdLists = getCcdIdListFromExposures(expRefList,
                                               level="sensor",
                                               ccdKeys=self.config.ccdKeys)

        # Ensure we can generate filenames for each output
        outputIdItemList = list(outputId.items())
        for ccdName in ccdIdLists:
            dataId = dict([(k, ccdName[i])
                           for i, k in enumerate(self.config.ccdKeys)])
            dataId.update(outputIdItemList)
            self.addMissingKeys(dataId, butler)
            dataId.update(outputIdItemList)

            try:
                butler.get(self.calibName + "_filename", dataId)
            except Exception as e:
                raise RuntimeError(
                    "Unable to determine output filename \"%s_filename\" from %s: %s"
                    % (self.calibName, dataId, e))

        pool = Pool()
        pool.storeSet(butler=butler)

        # Scatter: process CCDs independently
        data = self.scatterProcess(pool, ccdIdLists)

        # Gather: determine scalings
        scales = self.scale(ccdIdLists, data)

        # Scatter: combine
        self.scatterCombine(pool, outputId, ccdIdLists, scales)
Exemplo n.º 10
0
    def runDataRef(self, Id):
        self.log.info('begining for group %d' % (Id))
        #Prepare the storeSet
        pool = Pool("galsimProcessBatch")
        pool.cacheClear()
        expDir = "sim20210301/galaxy_basic_psf75"
        assert os.path.isdir(expDir)
        pool.storeSet(expDir=expDir)
        pool.storeSet(Id=Id)

        #Prepare the pool
        p2List = ['0000', '1111', '2222']
        p1List = ['g1', 'g2']
        pendList = ['%s-%s' % (i1, i2) for i1 in p1List for i2 in p2List]
        pool.map(self.process, pendList)
        self.log.info('finish group %d' % (Id))
        return
Exemplo n.º 11
0
    def runDataRef(self, patchRefList):
        """!Run multiband processing on coadds

        Only the master node runs this method.

        No real MPI communication (scatter/gather) takes place: all I/O goes
        through the disk. We want the intermediate stages on disk, and the
        component Tasks are implemented around this, so we just follow suit.

        @param patchRefList:  Data references to run measurement
        """
        print(len(patchRefList))
        for patchRef in patchRefList:
            if patchRef:
                butler = patchRef.getButler()
                break
        else:
            raise RuntimeError("No valid patches")
        pool = Pool("all")
        pool.cacheClear()
        pool.storeSet(butler=butler)

        # Group by patch
        patches = {}
        tract = None
        for patchRef in patchRefList:
            dataId = patchRef.dataId
            if tract is None:
                tract = dataId["tract"]
            else:
                assert tract == dataId["tract"]

            patch = dataId["patch"]
            if patch not in patches:
                patches[patch] = []
            patches[patch].append(dataId)

        print(patches.values())
        dataRefList = [
            getDataRef(cache.butler, dataId,
                       self.config.coaddName + "Coadd_calexp")
            for dataId in patches.values()
        ]
        pool.map(self.runAssociation, dataRefList)
Exemplo n.º 12
0
    def runDataRef(self, pend):
        self.log.info('begining for setup %s' % (pend))
        #Prepare the storeSet
        pool = Pool("cgcSimCosmoBatch")
        pool.cacheClear()
        expDir = "galaxy_cosmoR_psf60"
        if not os.path.isdir(expDir):
            os.mkdir(expDir)
        pool.storeSet(expDir=expDir)
        pool.storeSet(pend=pend)

        hpList = imgSimutil.cosmoHSThpix[4:5]
        #TODO: remove
        p2List = ['0000', '2222', '2000', '0200', '0020', '0002']
        p1List = ['g1']  #['g1','g2']
        hpList = ['%s-%s' % (i1, i2) for i1 in p1List for i2 in p2List]
        pool.map(self.process, hpList)
        self.log.info('finish setup %s' % (pend))
        return
Exemplo n.º 13
0
    def runTract(self, patchRefList, butler, selectDataList=[]):
        """Run stacking on a tract
        This method only runs on the master node.
        @param patchRefList: List of patch data references for tract
        @param butler: Data butler
        @param selectDataList: List of SelectStruct for inputs
        """
        pool = Pool("stacker")
        pool.cacheClear()
        pool.storeSet(butler=butler, warpType=self.config.coaddName + "Coadd_tempExp",
                      coaddType=self.config.coaddName + "Coadd")
        patchIdList = [patchRef.dataId for patchRef in patchRefList]

        selectedData = pool.map(self.warp, patchIdList, selectDataList)
        if self.config.doBackgroundReference:
            self.backgroundReference.run(patchRefList, selectDataList)

        refNamer = lambda patchRef: tuple(map(int, patchRef.dataId["patch"].split(",")))
        lookup = dict(zip(map(refNamer, patchRefList), selectedData))
        coaddData = [Struct(patchId=patchRef.dataId, selectDataList=lookup[refNamer(patchRef)]) for
                     patchRef in patchRefList]
        pool.map(self.coadd, coaddData)
Exemplo n.º 14
0
    def run(self, expRef):
        """Measure focus for exposure

        This method is the top-level for running the focus measurement
        as a stand-alone BatchPoolTask.

        Only the master node runs this method.
        """
        pool = Pool("processFocus")
        pool.cacheClear()
        pool.storeSet(butler=expRef.getButler())

        dataIdList = sorted([ccdRef.dataId for ccdRef in expRef.subItems("ccd") if
                             ccdRef.datasetExists("raw") and self.isFocus(ccdRef)])

        results = pool.map(self.processPool, dataIdList)

        camera = expRef.get("camera")
        plotFilename = expRef.get("focusPlot_filename")
        focus = self.measureFocus(results, camera, plotFilename)
        self.log.info("Focus result for %s: %s" % (expRef.dataId, focus))
        return focus
Exemplo n.º 15
0
 def runDataRef(self, index):
     #Prepare the pool
     pool = Pool("processBasic")
     pool.cacheClear()
     pool.storeSet(doHSM=self.config.doHSM)
     pool.storeSet(doFPFS=self.config.doFPFS)
     pool.storeSet(galDir=self.config.galDir)
     pool.storeSet(outDir=self.config.outDir)
     fieldList = np.arange(100 * index, 100 * (index + 1))
     pool.map(self.process, fieldList)
     return
Exemplo n.º 16
0
 def runDataRef(self, pend):
     psfFWHM = '60'  #'60','HSC'
     npend = 'outCosmoR-var36em4'
     outDir = os.path.join(self.config.rootDir, npend,
                           'mag245-res03-bm38-dis4')
     if not os.path.isdir(outDir):
         os.mkdir(outDir)
     self.log.info('beginning for %s, seeing %s: ' % (pend, psfFWHM))
     #Prepare the storeSet
     pool = Pool("reGaussCosmoMeasBatch")
     pool.cacheClear()
     pool.storeSet(pend=pend)
     pool.storeSet(psfFWHM=psfFWHM)
     pool.storeSet(npend=npend)
     #Prepare the pool
     resList = pool.map(self.process, np.arange(1000))
     resList = [x for x in resList if x is not None]
     if len(resList) > 1:
         newTab  =   Table(rows=resList,names=('e1_z1','n_z1',\
                 'e1_z2','n_z2','e1_z3','n_z3','e1_z4','n_z4'))
         finOname = os.path.join(outDir,
                                 'e1_%s_psf%s.fits' % (pend, psfFWHM))
         newTab.write(finOname, overwrite=True)
     return
Exemplo n.º 17
0
    def runDataRef(self, expRef):
        """Perform sky correction on an exposure

        We restore the original sky, and remove it again using multiple
        algorithms. We optionally apply:

        1. A large-scale background model.
            This step removes very-large-scale sky such as moonlight.
        2. A sky frame.
        3. A medium-scale background model.
            This step removes residual sky (This is smooth on the focal plane).

        Only the master node executes this method. The data is held on
        the slave nodes, which do all the hard work.

        Parameters
        ----------
        expRef : `lsst.daf.persistence.ButlerDataRef`
            Data reference for exposure.

        See Also
        --------
        ~lsst.pipe.drivers.SkyCorrectionTask.run
        """
        if DEBUG:
            extension = "-%(visit)d.fits" % expRef.dataId

        with self.logOperation("processing %s" % (expRef.dataId,)):
            pool = Pool()
            pool.cacheClear()
            pool.storeSet(butler=expRef.getButler())
            camera = expRef.get("camera")

            dataIdList = [ccdRef.dataId for ccdRef in expRef.subItems("ccd") if
                          ccdRef.datasetExists(self.config.calexpType)]

            exposures = pool.map(self.loadImage, dataIdList)
            if DEBUG:
                makeCameraImage(camera, exposures, "restored" + extension)
                exposures = pool.mapToPrevious(self.collectOriginal, dataIdList)
                makeCameraImage(camera, exposures, "original" + extension)
                exposures = pool.mapToPrevious(self.collectMask, dataIdList)
                makeCameraImage(camera, exposures, "mask" + extension)

            if self.config.doBgModel:
                exposures = self.focalPlaneBackground(camera, pool, dataIdList, self.config.bgModel)

            if self.config.doSky:
                measScales = pool.mapToPrevious(self.measureSkyFrame, dataIdList)
                scale = self.sky.solveScales(measScales)
                self.log.info("Sky frame scale: %s" % (scale,))

                exposures = pool.mapToPrevious(self.subtractSkyFrame, dataIdList, scale)
                if DEBUG:
                    makeCameraImage(camera, exposures, "skysub" + extension)
                    calibs = pool.mapToPrevious(self.collectSky, dataIdList)
                    makeCameraImage(camera, calibs, "sky" + extension)

            if self.config.doBgModel2:
                exposures = self.focalPlaneBackground(camera, pool, dataIdList, self.config.bgModel2)

            # Persist camera-level image of calexp
            image = makeCameraImage(camera, exposures)
            expRef.put(image, "calexp_camera")

            pool.mapToPrevious(self.write, dataIdList)
Exemplo n.º 18
0
    def run(self, expRef):
        """Perform sky correction on an exposure

        We restore the original sky, and remove it again using multiple
        algorithms. We optionally apply:

        1. A large-scale background model.
        2. A sky frame.

        Only the master node executes this method. The data is held on
        the slave nodes, which do all the hard work.

        Parameters
        ----------
        expRef : `lsst.daf.persistence.ButlerDataRef`
            Data reference for exposure.
        """
        if DEBUG:
            extension = "-%(visit)d.fits" % expRef.dataId

        with self.logOperation("processing %s" % (expRef.dataId, )):
            pool = Pool()
            pool.cacheClear()
            pool.storeSet(butler=expRef.getButler())
            camera = expRef.get("camera")

            dataIdList = [
                ccdRef.dataId for ccdRef in expRef.subItems("ccd")
                if ccdRef.datasetExists("calexp")
            ]

            exposures = pool.map(self.loadImage, dataIdList)
            if DEBUG:
                makeCameraImage(camera, exposures, "restored" + extension)
                exposures = pool.mapToPrevious(self.collectOriginal,
                                               dataIdList)
                makeCameraImage(camera, exposures, "original" + extension)
                exposures = pool.mapToPrevious(self.collectMask, dataIdList)
                makeCameraImage(camera, exposures, "mask" + extension)

            if self.config.doBgModel:
                bgModel = FocalPlaneBackground.fromCamera(
                    self.config.bgModel, camera)
                data = [
                    Struct(dataId=dataId, bgModel=bgModel.clone())
                    for dataId in dataIdList
                ]
                bgModelList = pool.mapToPrevious(self.accumulateModel, data)
                for ii, bg in enumerate(bgModelList):
                    self.log.info("Background %d: %d pixels", ii,
                                  bg._numbers.getArray().sum())
                    bgModel.merge(bg)

                if DEBUG:
                    bgModel.getStatsImage().writeFits("bgModel" + extension)
                    bgImages = pool.mapToPrevious(self.realiseModel,
                                                  dataIdList, bgModel)
                    makeCameraImage(camera, bgImages,
                                    "bgModelCamera" + extension)

                exposures = pool.mapToPrevious(self.subtractModel, dataIdList,
                                               bgModel)
                if DEBUG:
                    makeCameraImage(camera, exposures, "modelsub" + extension)

            if self.config.doSky:
                measScales = pool.mapToPrevious(self.measureSkyFrame,
                                                dataIdList)
                scale = self.sky.solveScales(measScales)
                self.log.info("Sky frame scale: %s" % (scale, ))
                exposures = pool.mapToPrevious(self.subtractSkyFrame,
                                               dataIdList, scale)
                if DEBUG:
                    makeCameraImage(camera, exposures, "skysub" + extension)
                    calibs = pool.mapToPrevious(self.collectSky, dataIdList)
                    makeCameraImage(camera, calibs, "sky" + extension)

            # Persist camera-level image of calexp
            image = makeCameraImage(camera, exposures)
            expRef.put(image, "calexp_camera")

            pool.mapToPrevious(self.write, dataIdList)
Exemplo n.º 19
0
    def runDataRef(self, patchRefList):
        """!Run multiband processing on coadds

        Only the master node runs this method.

        No real MPI communication (scatter/gather) takes place: all I/O goes
        through the disk. We want the intermediate stages on disk, and the
        component Tasks are implemented around this, so we just follow suit.

        @param patchRefList:  Data references to run measurement
        """
        for patchRef in patchRefList:
            if patchRef:
                butler = patchRef.getButler()
                break
        else:
            raise RuntimeError("No valid patches")
        pool = Pool("all")
        pool.cacheClear()
        pool.storeSet(butler=butler)
        # MultiBand measurements require that the detection stage be completed
        # before measurements can be made.
        #
        # The configuration for coaddDriver.py allows detection to be turned
        # of in the event that fake objects are to be added during the
        # detection process.  This allows the long co-addition process to be
        # run once, and multiple different MultiBand reruns (with different
        # fake objects) to exist from the same base co-addition.
        #
        # However, we only re-run detection if doDetection is explicitly True
        # here (this should always be the opposite of coaddDriver.doDetection);
        # otherwise we have no way to tell reliably whether any detections
        # present in an input repo are safe to use.
        if self.config.doDetection:
            detectionList = []
            for patchRef in patchRefList:
                if ("detectCoaddSources" in self.reuse
                        and patchRef.datasetExists(
                            self.coaddType + "Coadd_calexp", write=True)):
                    self.log.info(
                        "Skipping detectCoaddSources for %s; output already exists."
                        % patchRef.dataId)
                    continue
                if not patchRef.datasetExists(self.coaddType + "Coadd"):
                    self.log.debug(
                        "Not processing %s; required input %sCoadd missing." %
                        (patchRef.dataId, self.config.coaddName))
                    continue
                detectionList.append(patchRef)

            pool.map(self.runDetection, detectionList)

        patchRefList = [
            patchRef for patchRef in patchRefList
            if patchRef.datasetExists(self.coaddType + "Coadd_calexp")
            and patchRef.datasetExists(self.config.coaddName + "Coadd_det",
                                       write=self.config.doDetection)
        ]
        dataIdList = [patchRef.dataId for patchRef in patchRefList]

        # Group by patch
        patches = {}
        tract = None
        for patchRef in patchRefList:
            dataId = patchRef.dataId
            if tract is None:
                tract = dataId["tract"]
            else:
                assert tract == dataId["tract"]

            patch = dataId["patch"]
            if patch not in patches:
                patches[patch] = []
            patches[patch].append(dataId)

        pool.map(self.runMergeDetections, patches.values())

        # Deblend merged detections, and test for reprocessing
        #
        # The reprocessing allows us to have multiple attempts at deblending large footprints. Large
        # footprints can suck up a lot of memory in the deblender, which means that when we process on a
        # cluster, we want to refuse to deblend them (they're flagged "deblend.parent-too-big"). But since
        # they may have astronomically interesting data, we want the ability to go back and reprocess them
        # with a more permissive configuration when we have more memory or processing time.
        #
        # self.runDeblendMerged will return whether there are any footprints in that image that required
        # reprocessing.  We need to convert that list of booleans into a dict mapping the patchId (x,y) to
        # a boolean. That tells us whether the merge measurement and forced photometry need to be re-run on
        # a particular patch.
        #
        # This determination of which patches need to be reprocessed exists only in memory (the measurements
        # have been written, clobbering the old ones), so if there was an exception we would lose this
        # information, leaving things in an inconsistent state (measurements, merged measurements and
        # forced photometry old). To attempt to preserve this status, we touch a file (dataset named
        # "deepCoadd_multibandReprocessing") --- if this file exists, we need to re-run the measurements,
        # merge and forced photometry.
        #
        # This is, hopefully, a temporary workaround until we can improve the
        # deblender.
        try:
            reprocessed = pool.map(self.runDeblendMerged, patches.values())
        finally:
            if self.config.reprocessing:
                patchReprocessing = {}
                for dataId, reprocess in zip(dataIdList, reprocessed):
                    patchId = dataId["patch"]
                    patchReprocessing[patchId] = patchReprocessing.get(
                        patchId, False) or reprocess
                # Persist the determination, to make error recover easier
                reprocessDataset = self.config.coaddName + "Coadd_multibandReprocessing"
                for patchId in patchReprocessing:
                    if not patchReprocessing[patchId]:
                        continue
                    dataId = dict(tract=tract, patch=patchId)
                    if patchReprocessing[patchId]:
                        filename = butler.get(reprocessDataset + "_filename",
                                              dataId)[0]
                        open(filename, 'a').close()  # Touch file
                    elif butler.datasetExists(reprocessDataset, dataId):
                        # We must have failed at some point while reprocessing
                        # and we're starting over
                        patchReprocessing[patchId] = True

        # Only process patches that have been identifiedz as needing it
        pool.map(self.runMeasurements, [
            dataId1 for dataId1 in dataIdList if not self.config.reprocessing
            or patchReprocessing[dataId1["patch"]]
        ])
        pool.map(self.runMergeMeasurements, [
            idList for patchId, idList in patches.items()
            if not self.config.reprocessing or patchReprocessing[patchId]
        ])
        pool.map(self.runForcedPhot, [
            dataId1 for dataId1 in dataIdList if not self.config.reprocessing
            or patchReprocessing[dataId1["patch"]]
        ])

        # Remove persisted reprocessing determination
        if self.config.reprocessing:
            for patchId in patchReprocessing:
                if not patchReprocessing[patchId]:
                    continue
                dataId = dict(tract=tract, patch=patchId)
                filename = butler.get(reprocessDataset + "_filename",
                                      dataId)[0]
                os.unlink(filename)
    def run(self, patchRefList):
        """Run multiband processing on coadds
        All nodes execute this method, though the master and slaves
        take different routes through it.
        No real MPI communication takes place: all I/O goes through the disk.
        We want the intermediate stages on disk, and the component Tasks are
        implemented around this, so we just follow suit.
        @param patchRefList:  Data references to run measurement
        """
        for patchRef in patchRefList:
            if patchRef:
                butler = patchRef.getButler()
                break
        else:
            raise RuntimeError("No valid patches")
        pool = Pool("all")
        pool.cacheClear()
        pool.storeSet(butler=butler)

        patchRefList = [patchRef for patchRef in patchRefList if
                        patchRef.datasetExists(self.config.coaddName + "Coadd") and
                        patchRef.datasetExists(self.config.coaddName + "Coadd_det")]
        dataIdList = [patchRef.dataId for patchRef in patchRefList]

        # Group by patch
        patches = {}
        tract = None
        for patchRef in patchRefList:
            dataId = patchRef.dataId
            if tract is None:
                tract = dataId["tract"]
            else:
                assert tract == dataId["tract"]

            patch = dataId["patch"]
            if patch not in patches:
                patches[patch] = []
            patches[patch].append(dataId)

        pool.map(self.runMergeDetections, patches.values())

        # Measure merged detections, and test for reprocessing
        #
        # The reprocessing allows us to have multiple attempts at deblending large footprints. Large
        # footprints can suck up a lot of memory in the deblender, which means that when we process on a
        # cluster, we want to refuse to deblend them (they're flagged "deblend.parent-too-big"). But since
        # they may have astronomically interesting data, we want the ability to go back and reprocess them
        # with a more permissive configuration when we have more memory or processing time.
        #
        # self.runMeasureMerged will return whether there are any footprints in that image that required
        # reprocessing.  We need to convert that list of booleans into a dict mapping the patchId (x,y) to
        # a boolean. That tells us whether the merge measurement and forced photometry need to be re-run on
        # a particular patch.
        #
        # This determination of which patches need to be reprocessed exists only in memory (the measurements
        # have been written, clobbering the old ones), so if there was an exception we would lose this
        # information, leaving things in an inconsistent state (measurements new, but merged measurements and
        # forced photometry old). To attempt to preserve this status, we touch a file (dataset named
        # "deepCoadd_multibandReprocessing") --- if this file exists, we need to re-run the merge and
        # forced photometry.
        #
        # This is, hopefully, a temporary workaround until we can improve the deblender.
        try:
            reprocessed = pool.map(self.runMeasureMerged, dataIdList)
        finally:
            if self.config.reprocessing:
                patchReprocessing = {}
                for dataId, reprocess in zip(dataIdList, reprocessed):
                    patchId = dataId["patch"]
                    patchReprocessing[patchId] = patchReprocessing.get(patchId, False) or reprocess
                # Persist the determination, to make error recover easier
                reprocessDataset = self.config.coaddName + "Coadd_multibandReprocessing"
                for patchId in patchReprocessing:
                    if not patchReprocessing[patchId]:
                        continue
                    dataId = dict(tract=tract, patch=patchId)
                    if patchReprocessing[patchId]:
                        filename = butler.get(reprocessDataset + "_filename", dataId)[0]
                        open(filename, 'a').close() # Touch file
                    elif butler.datasetExists(reprocessDataset, dataId):
                        # We must have failed at some point while reprocessing and we're starting over
                        patchReprocessing[patchId] = True

        # Only process patches that have been identified as needing it
        pool.map(self.runMergeMeasurements, [idList for patchId, idList in patches.iteritems() if
                                             not self.config.reprocessing or patchReprocessing[patchId]])
        pool.map(self.runForcedPhot, [dataId for dataId in dataIdList if not self.config.reprocessing or
                                      patchReprocessing[dataId["patch"]]])

        # Remove persisted reprocessing determination
        if self.config.reprocessing:
            for patchId in patchReprocessing:
                if not patchReprocessing[patchId]:
                    continue
                dataId = dict(tract=tract, patch=patchId)
                filename = butler.get(reprocessDataset + "_filename", dataId)[0]
                os.unlink(filename)