def testQuantum(self):
     registry = self.makeRegistry()
     if not registry.limited:
         registry.addDimensionEntry("instrument",
                                    {"instrument": "DummyCam"})
     run = registry.makeRun(collection="test")
     storageClass = StorageClass("testQuantum")
     registry.storageClasses.registerStorageClass(storageClass)
     # Make two predicted inputs
     datasetType1 = DatasetType(name="dst1",
                                dimensions=registry.dimensions.extract(
                                    ("instrument", )),
                                storageClass=storageClass)
     registry.registerDatasetType(datasetType1)
     ref1 = registry.addDataset(datasetType1,
                                dataId={"instrument": "DummyCam"},
                                run=run)
     datasetType2 = DatasetType(name="dst2",
                                dimensions=registry.dimensions.extract(
                                    ("instrument", )),
                                storageClass=storageClass)
     registry.registerDatasetType(datasetType2)
     ref2 = registry.addDataset(datasetType2,
                                dataId={"instrument": "DummyCam"},
                                run=run)
     # Create and add a Quantum
     quantum = Quantum(run=run,
                       task="some.fully.qualified.SuperTask",
                       startTime=datetime(2018, 1, 1),
                       endTime=datetime(2018, 1, 2),
                       host="localhost")
     quantum.addPredictedInput(ref1)
     quantum.addPredictedInput(ref2)
     # Quantum is not yet in Registry, so can't mark input as actual
     with self.assertRaises(KeyError):
         registry.markInputUsed(quantum, ref1)
     registry.addQuantum(quantum)
     # Now we can
     registry.markInputUsed(quantum, ref1)
     outQuantum = registry.getQuantum(quantum.id)
     self.assertEqual(outQuantum, quantum)
     # Removing a predictedInput dataset should be enough to remove the
     # Quantum; we don't want to allow Quantums with inaccurate information
     # to exist.
     registry.removeDataset(ref1)
     self.assertIsNone(registry.getQuantum(quantum.id))
    def _makeQuanta(self, config):
        """Create set of Quanta
        """
        run = Run(collection=1, environment=None, pipeline=None)

        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.input)
        dstype0 = descriptor.datasetType
        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.output)
        dstype1 = descriptor.datasetType

        quanta = []
        for visit in range(100):
            quantum = Quantum(run=run, task=None)
            quantum.addPredictedInput(self._makeDSRefVisit(dstype0, visit))
            quantum.addOutput(self._makeDSRefVisit(dstype1, visit))
            quanta.append(quantum)

        return quanta
Пример #3
0
    def addQuantum(self, quantum: Quantum):
        config = self.taskDef.config
        connectionClass = config.connections.ConnectionsClass
        connectionInstance = connectionClass(config=config)
        # This will raise if one of the check conditions is not met, which is the intended
        # behavior
        result = connectionInstance.adjustQuantum(quantum.predictedInputs)
        quantum._predictedInputs = NamedKeyDict(result)

        # If this function has reached this far add the quantum
        self.quanta.append(quantum)
Пример #4
0
    def testRunQuantum(self):
        inputId = {
            "instrument": self.CAMERA_ID,
            "visit": self.VISIT_ID,
            "detector": self.CHIP_ID,
        }

        butler = self._makeButler()
        # self.task.config not persistable because it refers to a local class
        # We don't actually use the persisted config, so just make a new one
        butler.put(self.task.ConfigClass(), "apdb_marker", inputId)

        quantum = Quantum(taskClass=self.taskClass)
        quantum.addPredictedInput(
            ref_from_connection(butler, self.connections.dbInfo, inputId))
        quantum.addOutput(
            ref_from_connection(butler, self.connections.measurement, {
                "instrument": self.CAMERA_ID,
            }))

        run_quantum(self.task, butler, quantum)

        # Did output data ID get passed to DummyTask.run?
        measurement = butler.get(self.connections.measurement.name,
                                 instrument=self.CAMERA_ID)
        self.assertEqual(measurement.quantity,
                         len(self.CAMERA_ID) * u.dimensionless_unscaled)
Пример #5
0
    def testConstructor(self):
        """Test of constructor.
        """
        # Quantum specific arguments
        taskName = "some.task.object"  # can't use a real PipelineTask due to inverted package dependency

        quantum = Quantum(taskName=taskName)
        self.assertEqual(quantum.taskName, taskName)
        self.assertEqual(quantum.initInputs, {})
        self.assertEqual(quantum.inputs, NamedKeyDict())
        self.assertEqual(quantum.outputs, {})
        self.assertIsNone(quantum.dataId)

        universe = DimensionUniverse()
        instrument = "DummyCam"
        datasetTypeName = "test_ds"
        storageClass = StorageClass("testref_StructuredData")
        datasetType = DatasetType(datasetTypeName,
                                  universe.extract(("instrument", "visit")),
                                  storageClass)
        predictedInputs = {
            datasetType: [
                DatasetRef(datasetType, dict(instrument=instrument, visit=42)),
                DatasetRef(datasetType, dict(instrument=instrument, visit=43))
            ]
        }
        outputs = {
            datasetType: [
                DatasetRef(datasetType, dict(instrument=instrument, visit=42)),
                DatasetRef(datasetType, dict(instrument=instrument, visit=43))
            ]
        }

        quantum = Quantum(taskName=taskName,
                          inputs=predictedInputs,
                          outputs=outputs)
        self.assertEqual(len(quantum.inputs[datasetType]), 2)
        self.assertEqual(len(quantum.outputs[datasetType]), 2)
Пример #6
0
def makeQuantum(task, butler, dataId, ioDataIds):
    """Create a Quantum for a particular data ID(s).

    Parameters
    ----------
    task : `lsst.pipe.base.PipelineTask`
        The task whose processing the quantum represents.
    butler : `lsst.daf.butler.Butler`
        The collection the quantum refers to.
    dataId: any data ID type
        The data ID of the quantum. Must have the same dimensions as
        ``task``'s connections class.
    ioDataIds : `collections.abc.Mapping` [`str`]
        A mapping keyed by input/output names. Values must be data IDs for
        single connections and sequences of data IDs for multiple connections.

    Returns
    -------
    quantum : `lsst.daf.butler.Quantum`
        A quantum for ``task``, when called with ``dataIds``.
    """
    connections = task.config.ConnectionsClass(config=task.config)

    try:
        inputs = defaultdict(list)
        outputs = defaultdict(list)
        for name in itertools.chain(connections.inputs,
                                    connections.prerequisiteInputs):
            connection = connections.__getattribute__(name)
            _checkDataIdMultiplicity(name, ioDataIds[name],
                                     connection.multiple)
            ids = _normalizeDataIds(ioDataIds[name])
            for id in ids:
                ref = _refFromConnection(butler, connection, id)
                inputs[ref.datasetType].append(ref)
        for name in connections.outputs:
            connection = connections.__getattribute__(name)
            _checkDataIdMultiplicity(name, ioDataIds[name],
                                     connection.multiple)
            ids = _normalizeDataIds(ioDataIds[name])
            for id in ids:
                ref = _refFromConnection(butler, connection, id)
                outputs[ref.datasetType].append(ref)
        quantum = Quantum(taskClass=type(task),
                          dataId=dataId,
                          inputs=inputs,
                          outputs=outputs)
        return quantum
    except KeyError as e:
        raise ValueError("Mismatch in input data.") from e
Пример #7
0
 def from_simple(
     cls,
     simple: SerializedQuantumNode,
     taskDefMap: Dict[str, TaskDef],
     universe: DimensionUniverse,
     recontitutedDimensions: Optional[Dict[int,
                                           Tuple[str,
                                                 DimensionRecord]]] = None,
 ) -> QuantumNode:
     return QuantumNode(
         quantum=Quantum.from_simple(
             simple.quantum,
             universe,
             reconstitutedDimensions=recontitutedDimensions),
         taskDef=taskDefMap[simple.taskLabel],
         nodeId=simple.nodeId,
     )
Пример #8
0
    def _makeQuanta(self, config):
        """Create set of Quanta"""
        universe = DimensionUniverse()
        connections = config.connections.ConnectionsClass(config=config)

        dstype0 = connections.input.makeDatasetType(universe)
        dstype1 = connections.output.makeDatasetType(universe)

        quanta = []
        for visit in range(100):
            inputRef = self._makeDSRefVisit(dstype0, visit, universe)
            outputRef = self._makeDSRefVisit(dstype1, visit, universe)
            quantum = Quantum(
                inputs={inputRef.datasetType: [inputRef]}, outputs={outputRef.datasetType: [outputRef]}
            )
            quanta.append(quantum)

        return quanta
    def updatedQuantumInputs(self, quantum, butler):
        """Update quantum with extra information, returns a new updated Quantum.

        Some methods may require input DatasetRefs to have non-None
        ``dataset_id``, but in case of intermediate dataset it may not be
        filled during QuantumGraph construction. This method will retrieve
        missing info from registry.

        Parameters
        ----------
        quantum : `~lsst.daf.butler.Quantum`
            Single Quantum instance.
        butler : `~lsst.daf.butler.Butler`
            Data butler.

        Returns
        -------
        update : `~lsst.daf.butler.Quantum`
            Updated Quantum instance
        """
        updatedInputs = defaultdict(list)
        for key, refsForDatasetType in quantum.inputs.items():
            newRefsForDatasetType = updatedInputs[key]
            for ref in refsForDatasetType:
                if ref.id is None:
                    resolvedRef = butler.registry.findDataset(
                        ref.datasetType,
                        ref.dataId,
                        collections=butler.collections)
                    if resolvedRef is None:
                        raise ValueError(
                            f"Cannot find {ref.datasetType.name} with id {ref.dataId} "
                            f"in collections {butler.collections}.")
                    newRefsForDatasetType.append(resolvedRef)
                    _LOG.debug("Updating dataset ID for %s", ref)
                else:
                    newRefsForDatasetType.append(ref)
        return Quantum(taskName=quantum.taskName,
                       taskClass=quantum.taskClass,
                       dataId=quantum.dataId,
                       initInputs=quantum.initInputs,
                       inputs=updatedInputs,
                       outputs=quantum.outputs)
Пример #10
0
def _makeQGraph():
    """Make a trivial QuantumGraph with one quantum.

    The only thing that we need to do with this quantum graph is to pickle
    it, the quanta in this graph are not usable for anything else.

    Returns
    -------
    qgraph : `~lsst.pipe.base.QuantumGraph`
    """

    # The task name in TaskDef needs to be a real importable name, use one that is sure to exist
    taskDef = TaskDef(taskName="lsst.pipe.base.Struct", config=SimpleConfig())
    quanta = [
        Quantum(taskName="lsst.pipe.base.Struct",
                inputs={FakeTaskDef("A"): FakeDSRef("A", (1, 2))})
    ]  # type: ignore
    qgraph = QuantumGraph({taskDef: set(quanta)})
    return qgraph
Пример #11
0
 def testConstructor(self):
     """Test of constructor.
     """
     # Quantum specific arguments
     run = None  # TODO add Run
     taskName = "some.task.object"  # can't use a real PipelineTask due to inverted package dependency
     # Base class arguments
     startTime = datetime(2018, 1, 1)
     endTime = datetime(2018, 1, 2)
     host = "localhost"
     quantum = Quantum(taskName=taskName, run=run, startTime=startTime, endTime=endTime, host=host)
     self.assertEqual(quantum.taskName, taskName)
     self.assertEqual(quantum.run, run)
     self.assertEqual(quantum.predictedInputs, NamedKeyDict())
     self.assertEqual(quantum.actualInputs, NamedKeyDict())
     self.assertIsNone(quantum.dataId)
     self.assertIsNone(quantum.id)
     self.assertEqual(quantum.startTime, startTime)
     self.assertEqual(quantum.endTime, endTime)
     self.assertEqual(quantum.host, host)
Пример #12
0
    def makeQuantum(self) -> Quantum:
        """Transform the scaffolding object into a true `Quantum` instance.

        Returns
        -------
        quantum : `Quantum`
            An actual `Quantum` instance.
        """
        allInputs = self.inputs.unpackMultiRefs()
        allInputs.update(self.prerequisites.unpackMultiRefs())
        # Give the task's Connections class an opportunity to remove some
        # inputs, or complain if they are unacceptable.
        # This will raise if one of the check conditions is not met, which is
        # the intended behavior
        allInputs = self.task.taskDef.connections.adjustQuantum(allInputs)
        return Quantum(
            taskName=self.task.taskDef.taskName,
            taskClass=self.task.taskDef.taskClass,
            dataId=self.dataId,
            initInputs=self.task.initInputs.unpackSingleRefs(),
            inputs=allInputs,
            outputs=self.outputs.unpackMultiRefs(),
        )
    def _makeQuanta(self, config):
        """Create set of Quanta
        """
        universe = DimensionUniverse.fromConfig()
        run = Run(collection=1, environment=None, pipeline=None)

        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.input)
        dstype0 = descriptor.makeDatasetType(universe)
        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.output)
        dstype1 = descriptor.makeDatasetType(universe)

        quanta = []
        for visit in range(100):
            quantum = Quantum(run=run, task=None)
            quantum.addPredictedInput(self._makeDSRefVisit(dstype0, visit))
            quantum.addOutput(self._makeDSRefVisit(dstype1, visit))
            quanta.append(quantum)

        return quanta
Пример #14
0
    def _makeQuanta(self, config):
        """Create set of Quanta
        """
        universe = DimensionUniverse()
        run = Run(collection=1, environment=None, pipeline=None)
        connections = config.connections.ConnectionsClass(config=config)

        dstype0 = connections.input.makeDatasetType(universe)
        dstype1 = connections.output.makeDatasetType(universe)

        quanta = []
        for visit in range(100):
            quantum = Quantum(run=run)
            quantum.addPredictedInput(
                self._makeDSRefVisit(dstype0, visit, universe))
            quantum.addOutput(self._makeDSRefVisit(dstype1, visit, universe))
            quanta.append(quantum)

        return quanta
Пример #15
0
    def testAddInputsOutputs(self):
        """Test of addPredictedInput() method.
        """
        quantum = Quantum(taskName="some.task.object", run=None)

        # start with empty
        self.assertEqual(quantum.predictedInputs, dict())
        universe = DimensionUniverse()
        instrument = "DummyCam"
        datasetTypeName = "test_ds"
        storageClass = StorageClass("testref_StructuredData")
        datasetType = DatasetType(datasetTypeName,
                                  universe.extract(("instrument", "visit")),
                                  storageClass)

        # add one ref
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42))
        quantum.addPredictedInput(ref)
        self.assertIn(datasetTypeName, quantum.predictedInputs)
        self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 1)
        # add second ref
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43))
        quantum.addPredictedInput(ref)
        self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 2)

        # mark last ref as actually used
        self.assertEqual(quantum.actualInputs, dict())
        quantum._markInputUsed(ref)
        self.assertIn(datasetTypeName, quantum.actualInputs)
        self.assertEqual(len(quantum.actualInputs[datasetTypeName]), 1)

        # add couple of outputs too
        self.assertEqual(quantum.outputs, dict())
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42))
        quantum.addOutput(ref)
        self.assertIn(datasetTypeName, quantum.outputs)
        self.assertEqual(len(quantum.outputs[datasetTypeName]), 1)

        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43))
        quantum.addOutput(ref)
        self.assertEqual(len(quantum.outputs[datasetTypeName]), 2)
Пример #16
0
 def setUp(self):
     config = Config(
         {
             "version": 1,
             "namespace": "pipe_base_test",
             "skypix": {
                 "common": "htm7",
                 "htm": {
                     "class": "lsst.sphgeom.HtmPixelization",
                     "max_level": 24,
                 },
             },
             "elements": {
                 "A": {
                     "keys": [
                         {
                             "name": "id",
                             "type": "int",
                         }
                     ],
                     "storage": {
                         "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
                     },
                 },
                 "B": {
                     "keys": [
                         {
                             "name": "id",
                             "type": "int",
                         }
                     ],
                     "storage": {
                         "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
                     },
                 },
             },
             "packers": {},
         }
     )
     universe = DimensionUniverse(config=config)
     # need to make a mapping of TaskDef to set of quantum
     quantumMap = {}
     tasks = []
     for task, label in (
         (Dummy1PipelineTask, "R"),
         (Dummy2PipelineTask, "S"),
         (Dummy3PipelineTask, "T"),
         (Dummy4PipelineTask, "U"),
     ):
         config = task.ConfigClass()
         taskDef = TaskDef(get_full_type_name(task), config, task, label)
         tasks.append(taskDef)
         quantumSet = set()
         connections = taskDef.connections
         for a, b in ((1, 2), (3, 4)):
             if connections.initInputs:
                 initInputDSType = DatasetType(
                     connections.initInput.name,
                     tuple(),
                     storageClass=connections.initInput.storageClass,
                     universe=universe,
                 )
                 initRefs = [DatasetRef(initInputDSType, DataCoordinate.makeEmpty(universe))]
             else:
                 initRefs = None
             inputDSType = DatasetType(
                 connections.input.name,
                 connections.input.dimensions,
                 storageClass=connections.input.storageClass,
                 universe=universe,
             )
             inputRefs = [
                 DatasetRef(inputDSType, DataCoordinate.standardize({"A": a, "B": b}, universe=universe))
             ]
             outputDSType = DatasetType(
                 connections.output.name,
                 connections.output.dimensions,
                 storageClass=connections.output.storageClass,
                 universe=universe,
             )
             outputRefs = [
                 DatasetRef(outputDSType, DataCoordinate.standardize({"A": a, "B": b}, universe=universe))
             ]
             quantumSet.add(
                 Quantum(
                     taskName=task.__qualname__,
                     dataId=DataCoordinate.standardize({"A": a, "B": b}, universe=universe),
                     taskClass=task,
                     initInputs=initRefs,
                     inputs={inputDSType: inputRefs},
                     outputs={outputDSType: outputRefs},
                 )
             )
         quantumMap[taskDef] = quantumSet
     self.tasks = tasks
     self.quantumMap = quantumMap
     self.qGraph = QuantumGraph(quantumMap, metadata=METADATA)
     self.universe = universe
Пример #17
0
    def fillQuanta(self, registry, inputCollections, *, skipExisting=True):
        """Define quanta for each task by splitting up the datasets associated
        with each task data ID.

        This method populates `_TaskScaffolding.quanta`.

        Parameters
        ----------
        registry : `lsst.daf.butler.Registry`
            Registry for the data repository; used for all data ID queries.
        inputCollections : `~collections.abc.Mapping`
            Mapping from dataset type name to an ordered sequence of
            collections to search for that dataset.  A `defaultdict` is
            recommended for the case where the same collections should be
            used for most datasets.
        skipExisting : `bool`, optional
            If `True` (default), a Quantum is not created if all its outputs
            already exist.
        """
        for task in self.tasks:
            for quantumDataId in task.dataIds:
                # Identify the (regular) inputs that correspond to the Quantum
                # with this data ID.  These are those whose data IDs have the
                # same values for all dimensions they have in common.
                # We do this data IDs expanded to include implied dimensions,
                # which is why _DatasetScaffolding.dimensions is thus expanded
                # even though DatasetType.dimensions is not.
                inputs = NamedKeyDict()
                for datasetType, scaffolding in task.inputs.items():
                    inputs[datasetType] = [ref for ref, dataId in zip(scaffolding.refs, scaffolding.dataIds)
                                           if quantumDataId.matches(dataId)]
                # Same for outputs.
                outputs = NamedKeyDict()
                allOutputsPresent = True
                for datasetType, scaffolding in task.outputs.items():
                    outputs[datasetType] = []
                    for ref, dataId in zip(scaffolding.refs, scaffolding.dataIds):
                        if quantumDataId.matches(dataId):
                            if ref.id is None:
                                allOutputsPresent = False
                            else:
                                assert skipExisting, "Existing outputs should have already been identified."
                                if not allOutputsPresent:
                                    raise OutputExistsError(f"Output {datasetType.name} with data ID "
                                                            f"{dataId} already exists, but other outputs "
                                                            f"for task with label {task.taskDef.label} "
                                                            f"and data ID {quantumDataId} do not.")
                            outputs[datasetType].append(ref)
                if allOutputsPresent and skipExisting:
                    continue

                # Look up prerequisite datasets in the input collection(s).
                # These may have dimensions that extend beyond those we queried
                # for originally, because we want to permit those data ID
                # values to differ across quanta and dataset types.
                # For example, the same quantum may have a flat and bias with
                # a different calibration_label, or a refcat with a skypix
                # value that overlaps the quantum's data ID's region, but not
                # the user expression used for the initial query.
                for datasetType, scaffolding in task.prerequisites.items():
                    refs = list(
                        registry.queryDatasets(
                            datasetType,
                            collections=inputCollections[datasetType.name],
                            dataId=quantumDataId,
                            deduplicate=True,
                            expand=True,
                        )
                    )
                    inputs[datasetType] = refs
                task.addQuantum(
                    Quantum(
                        taskName=task.taskDef.taskName,
                        taskClass=task.taskDef.taskClass,
                        dataId=quantumDataId,
                        initInputs=task.initInputs.unpackRefs(),
                        predictedInputs=inputs,
                        outputs=outputs,
                    )
                )
Пример #18
0
    def _makeGraph(self, taskDatasets, required, optional, prerequisite,
                   initInputs, initOutputs, originInfo, userQuery,
                   perDatasetTypeDimensions=()):
        """Make QuantumGraph instance.

        Parameters
        ----------
        taskDatasets : sequence of `_TaskDatasetTypes`
            Tasks with their inputs and outputs.
        required : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository in order to generate
            a QuantumGraph node that consumes them.
        optional : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that will be produced by the graph, but may exist in
            the repository.  If ``self.skipExisting`` and all outputs of a
            particular node already exist, it will be skipped.  Otherwise
            pre-existing datasets of these types will cause
            `OutputExistsError` to be raised.
        prerequisite : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository, but whose absence
            should cause `PrerequisiteMissingError` to be raised if they
            are needed by any graph node that would otherwise be created.
        initInputs : `set` of `DatasetType`
            Datasets which should exist in input repository, and will be used
            in task initialization
        initOutputs : `set` of `DatasetType`
            Datasets which which will be created in task initialization
        originInfo : `DatasetOriginInfo`
            Object which provides names of the input/output collections.
        userQuery : `str`
            String which defines user-defined selection for registry, should be
            empty or `None` if there is no restrictions on data selection.
        perDatasetTypeDimensions : iterable of `Dimension` or `str`
            Dimensions (or names thereof) that may have different values for
            different dataset types within the same quantum.

        Returns
        -------
        `QuantumGraph` instance.
        """
        rows = self.registry.selectMultipleDatasetTypes(
            originInfo, userQuery,
            required=required, optional=optional, prerequisite=prerequisite,
            perDatasetTypeDimensions=perDatasetTypeDimensions
        )

        # store result locally for multi-pass algorithm below
        # TODO: change it to single pass
        dimensionVerse = []
        try:
            for row in rows:
                _LOG.debug("row: %s", row)
                dimensionVerse.append(row)
        except LookupError as err:
            raise PrerequisiteMissingError(str(err)) from err

        # Next step is to group by task quantum dimensions
        qgraph = QuantumGraph()
        qgraph._inputDatasetTypes = (required | prerequisite)
        qgraph._outputDatasetTypes = optional
        for dsType in initInputs:
            for collection in originInfo.getInputCollections(dsType.name):
                result = self.registry.find(collection, dsType)
                if result is not None:
                    qgraph.initInputs.append(result)
                    break
            else:
                raise GraphBuilderError(f"Could not find initInput {dsType.name} in any input"
                                        " collection")
        for dsType in initOutputs:
            qgraph.initOutputs.append(DatasetRef(dsType, {}))

        for taskDss in taskDatasets:
            taskQuantaInputs = {}    # key is the quantum dataId (as tuple)
            taskQuantaOutputs = {}   # key is the quantum dataId (as tuple)
            qlinks = []
            for dimensionName in taskDss.taskDef.config.quantum.dimensions:
                dimension = self.dimensions[dimensionName]
                qlinks += dimension.links()
            _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label, qlinks)

            # some rows will be non-unique for subset of dimensions, create
            # temporary structure to remove duplicates
            for row in dimensionVerse:
                qkey = tuple((col, row.dataId[col]) for col in qlinks)
                _LOG.debug("qkey: %s", qkey)

                def _datasetRefKey(datasetRef):
                    return tuple(sorted(datasetRef.dataId.items()))

                qinputs = taskQuantaInputs.setdefault(qkey, {})
                for dsType in taskDss.inputs:
                    datasetRefs = qinputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add input datasetRef: %s %s", dsType.name, datasetRef)

                qoutputs = taskQuantaOutputs.setdefault(qkey, {})
                for dsType in taskDss.outputs:
                    datasetRefs = qoutputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add output datasetRef: %s %s", dsType.name, datasetRef)

            # all nodes for this task
            quanta = []
            for qkey in taskQuantaInputs:
                # taskQuantaInputs and taskQuantaOutputs have the same keys
                _LOG.debug("make quantum for qkey: %s", qkey)
                quantum = Quantum(run=None, task=None)

                # add all outputs, but check first that outputs don't exist
                outputs = list(chain.from_iterable(datasetRefs.values()
                                                   for datasetRefs in taskQuantaOutputs[qkey].values()))
                for ref in outputs:
                    _LOG.debug("add output: %s", ref)
                if self.skipExisting and all(ref.id is not None for ref in outputs):
                    _LOG.debug("all output datasetRefs already exist, skip quantum")
                    continue
                if any(ref.id is not None for ref in outputs):
                    # some outputs exist, can't override them
                    raise OutputExistsError(taskDss.taskDef.taskName, outputs)

                for ref in outputs:
                    quantum.addOutput(ref)

                # add all inputs
                for datasetRefs in taskQuantaInputs[qkey].values():
                    for ref in datasetRefs.values():
                        quantum.addPredictedInput(ref)
                        _LOG.debug("add input: %s", ref)

                quanta.append(quantum)

            qgraph.append(QuantumGraphTaskNodes(taskDss.taskDef, quanta))

        return qgraph
Пример #19
0
    def _makeGraph(self, taskDatasets, inputs, outputs, initInputs, initOutputs, originInfo, userQuery):
        """Make QuantumGraph instance.

        Parameters
        ----------
        taskDatasets : sequence of `_TaskDatasetTypes`
            Tasks with their inputs and outputs.
        inputs : `set` of `DatasetType`
            Datasets which should already exist in input repository
        outputs : `set` of `DatasetType`
            Datasets which will be created by tasks
        initInputs : `set` of `DatasetType`
            Datasets which should exist in input repository, and will be used
            in task initialization
        initOutputs : `set` of `DatasetType`
            Datasets which which will be created in task initialization
        originInfo : `DatasetOriginInfo`
            Object which provides names of the input/output collections.
        userQuery : `str`
            String which defunes user-defined selection for registry, should be
            empty or `None` if there is no restrictions on data selection.

        Returns
        -------
        `QuantumGraph` instance.
        """
        parsedQuery = self._parseUserQuery(userQuery or "")
        expr = None if parsedQuery is None else str(parsedQuery)
        rows = self.registry.selectDimensions(originInfo, expr, inputs, outputs)

        # store result locally for multi-pass algorithm below
        # TODO: change it to single pass
        dimensionVerse = []
        for row in rows:
            _LOG.debug("row: %s", row)
            dimensionVerse.append(row)

        # Next step is to group by task quantum dimensions
        qgraph = QuantumGraph()
        qgraph._inputDatasetTypes = inputs
        qgraph._outputDatasetTypes = outputs
        for dsType in initInputs:
            for collection in originInfo.getInputCollections(dsType.name):
                result = self.registry.find(collection, dsType)
                if result is not None:
                    qgraph.initInputs.append(result)
                    break
            else:
                raise GraphBuilderError(f"Could not find initInput {dsType.name} in any input"
                                        " collection")
        for dsType in initOutputs:
            qgraph.initOutputs.append(DatasetRef(dsType, {}))

        for taskDss in taskDatasets:
            taskQuantaInputs = {}    # key is the quantum dataId (as tuple)
            taskQuantaOutputs = {}   # key is the quantum dataId (as tuple)
            qlinks = []
            for dimensionName in taskDss.taskDef.config.quantum.dimensions:
                dimension = self.dimensions[dimensionName]
                qlinks += dimension.link
            _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label, qlinks)

            # some rows will be non-unique for subset of dimensions, create
            # temporary structure to remove duplicates
            for row in dimensionVerse:
                qkey = tuple((col, row.dataId[col]) for col in qlinks)
                _LOG.debug("qkey: %s", qkey)

                def _dataRefKey(dataRef):
                    return tuple(sorted(dataRef.dataId.items()))

                qinputs = taskQuantaInputs.setdefault(qkey, {})
                for dsType in taskDss.inputs:
                    dataRefs = qinputs.setdefault(dsType, {})
                    dataRef = row.datasetRefs[dsType]
                    dataRefs[_dataRefKey(dataRef)] = dataRef
                    _LOG.debug("add input dataRef: %s %s", dsType.name, dataRef)

                qoutputs = taskQuantaOutputs.setdefault(qkey, {})
                for dsType in taskDss.outputs:
                    dataRefs = qoutputs.setdefault(dsType, {})
                    dataRef = row.datasetRefs[dsType]
                    dataRefs[_dataRefKey(dataRef)] = dataRef
                    _LOG.debug("add output dataRef: %s %s", dsType.name, dataRef)

            # pre-flight does not fill dataset components, and graph users
            # may need to know that, re-retrieve all input datasets to have
            # their components properly filled.
            for qinputs in taskQuantaInputs.values():
                for dataRefs in qinputs.values():
                    for key in dataRefs.keys():
                        if dataRefs[key].id is not None:
                            dataRefs[key] = self.registry.getDataset(dataRefs[key].id)

            # all nodes for this task
            quanta = []
            for qkey in taskQuantaInputs:
                # taskQuantaInputs and taskQuantaOutputs have the same keys
                _LOG.debug("make quantum for qkey: %s", qkey)
                quantum = Quantum(run=None, task=None)

                # add all outputs, but check first that outputs don't exist
                outputs = list(chain.from_iterable(dataRefs.values()
                                                   for dataRefs in taskQuantaOutputs[qkey].values()))
                for ref in outputs:
                    _LOG.debug("add output: %s", ref)
                if self.skipExisting and all(ref.id is not None for ref in outputs):
                    _LOG.debug("all output dataRefs already exist, skip quantum")
                    continue
                if any(ref.id is not None for ref in outputs):
                    # some outputs exist, can't override them
                    raise OutputExistsError(taskDss.taskDef.taskName, outputs)
                for ref in outputs:
                    quantum.addOutput(ref)

                # add all inputs
                for dataRefs in taskQuantaInputs[qkey].values():
                    for ref in dataRefs.values():
                        quantum.addPredictedInput(ref)
                        _LOG.debug("add input: %s", ref)

                quanta.append(quantum)

            qgraph.append(QuantumGraphNodes(taskDss.taskDef, quanta))

        return qgraph
Пример #20
0
def makeQuantum(
    task: PipelineTask,
    butler: Butler,
    dataId: DataId,
    ioDataIds: Mapping[str, Union[DataId, Sequence[DataId]]],
) -> Quantum:
    """Create a Quantum for a particular data ID(s).

    Parameters
    ----------
    task : `lsst.pipe.base.PipelineTask`
        The task whose processing the quantum represents.
    butler : `lsst.daf.butler.Butler`
        The collection the quantum refers to.
    dataId: any data ID type
        The data ID of the quantum. Must have the same dimensions as
        ``task``'s connections class.
    ioDataIds : `collections.abc.Mapping` [`str`]
        A mapping keyed by input/output names. Values must be data IDs for
        single connections and sequences of data IDs for multiple connections.

    Returns
    -------
    quantum : `lsst.daf.butler.Quantum`
        A quantum for ``task``, when called with ``dataIds``.
    """
    connections = task.config.ConnectionsClass(config=task.config)

    try:
        _checkDimensionsMatch(butler.registry.dimensions,
                              connections.dimensions, dataId.keys())
    except ValueError as e:
        raise ValueError("Error in quantum dimensions.") from e

    inputs = defaultdict(list)
    outputs = defaultdict(list)
    for name in itertools.chain(connections.inputs,
                                connections.prerequisiteInputs):
        try:
            connection = connections.__getattribute__(name)
            _checkDataIdMultiplicity(name, ioDataIds[name],
                                     connection.multiple)
            ids = _normalizeDataIds(ioDataIds[name])
            for id in ids:
                ref = _refFromConnection(butler, connection, id)
                inputs[ref.datasetType].append(ref)
        except (ValueError, KeyError) as e:
            raise ValueError(f"Error in connection {name}.") from e
    for name in connections.outputs:
        try:
            connection = connections.__getattribute__(name)
            _checkDataIdMultiplicity(name, ioDataIds[name],
                                     connection.multiple)
            ids = _normalizeDataIds(ioDataIds[name])
            for id in ids:
                ref = _refFromConnection(butler, connection, id)
                outputs[ref.datasetType].append(ref)
        except (ValueError, KeyError) as e:
            raise ValueError(f"Error in connection {name}.") from e
    quantum = Quantum(
        taskClass=type(task),
        dataId=DataCoordinate.standardize(dataId,
                                          universe=butler.registry.dimensions),
        inputs=inputs,
        outputs=outputs,
    )
    return quantum
Пример #21
0
    def _makeGraph(self,
                   taskDatasets,
                   required,
                   optional,
                   prerequisite,
                   initInputs,
                   initOutputs,
                   originInfo,
                   userQuery,
                   perDatasetTypeDimensions=()):
        """Make QuantumGraph instance.

        Parameters
        ----------
        taskDatasets : sequence of `_TaskDatasetTypes`
            Tasks with their inputs and outputs.
        required : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository in order to generate
            a QuantumGraph node that consumes them.
        optional : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that will be produced by the graph, but may exist in
            the repository.  If ``self.skipExisting`` and all outputs of a
            particular node already exist, it will be skipped.  Otherwise
            pre-existing datasets of these types will cause
            `OutputExistsError` to be raised.
        prerequisite : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository, but whose absence
            should cause `PrerequisiteMissingError` to be raised if they
            are needed by any graph node that would otherwise be created.
        initInputs : `set` of `DatasetType`
            Datasets which should exist in input repository, and will be used
            in task initialization
        initOutputs : `set` of `DatasetType`
            Datasets which which will be created in task initialization
        originInfo : `DatasetOriginInfo`
            Object which provides names of the input/output collections.
        userQuery : `str`
            String which defines user-defined selection for registry, should be
            empty or `None` if there is no restrictions on data selection.
        perDatasetTypeDimensions : iterable of `Dimension` or `str`
            Dimensions (or names thereof) that may have different values for
            different dataset types within the same quantum.

        Returns
        -------
        `QuantumGraph` instance.
        """
        rows = self.registry.selectMultipleDatasetTypes(
            originInfo,
            userQuery,
            required=required,
            optional=optional,
            prerequisite=prerequisite,
            perDatasetTypeDimensions=perDatasetTypeDimensions)

        # store result locally for multi-pass algorithm below
        # TODO: change it to single pass
        dimensionVerse = []
        try:
            for row in rows:
                _LOG.debug("row: %s", row)
                dimensionVerse.append(row)
        except LookupError as err:
            raise PrerequisiteMissingError(str(err)) from err

        # Next step is to group by task quantum dimensions
        qgraph = QuantumGraph()
        qgraph._inputDatasetTypes = (required | prerequisite)
        qgraph._outputDatasetTypes = optional
        for dsType in initInputs:
            for collection in originInfo.getInputCollections(dsType.name):
                result = self.registry.find(collection, dsType)
                if result is not None:
                    qgraph.initInputs.append(result)
                    break
            else:
                raise GraphBuilderError(
                    f"Could not find initInput {dsType.name} in any input"
                    " collection")
        for dsType in initOutputs:
            qgraph.initOutputs.append(DatasetRef(dsType, {}))

        for taskDss in taskDatasets:
            taskQuantaInputs = {}  # key is the quantum dataId (as tuple)
            taskQuantaOutputs = {}  # key is the quantum dataId (as tuple)
            qlinks = []
            for dimensionName in taskDss.taskDef.config.quantum.dimensions:
                dimension = self.dimensions[dimensionName]
                qlinks += dimension.links()
            _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label,
                       qlinks)

            # some rows will be non-unique for subset of dimensions, create
            # temporary structure to remove duplicates
            for row in dimensionVerse:
                qkey = tuple((col, row.dataId[col]) for col in qlinks)
                _LOG.debug("qkey: %s", qkey)

                def _datasetRefKey(datasetRef):
                    return tuple(sorted(datasetRef.dataId.items()))

                qinputs = taskQuantaInputs.setdefault(qkey, {})
                for dsType in taskDss.inputs:
                    datasetRefs = qinputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add input datasetRef: %s %s", dsType.name,
                               datasetRef)

                qoutputs = taskQuantaOutputs.setdefault(qkey, {})
                for dsType in taskDss.outputs:
                    datasetRefs = qoutputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add output datasetRef: %s %s", dsType.name,
                               datasetRef)

            # all nodes for this task
            quanta = []
            for qkey in taskQuantaInputs:
                # taskQuantaInputs and taskQuantaOutputs have the same keys
                _LOG.debug("make quantum for qkey: %s", qkey)
                quantum = Quantum(run=None, task=None)

                # add all outputs, but check first that outputs don't exist
                outputs = list(
                    chain.from_iterable(
                        datasetRefs.values()
                        for datasetRefs in taskQuantaOutputs[qkey].values()))
                for ref in outputs:
                    _LOG.debug("add output: %s", ref)
                if self.skipExisting and all(ref.id is not None
                                             for ref in outputs):
                    _LOG.debug(
                        "all output datasetRefs already exist, skip quantum")
                    continue
                if any(ref.id is not None for ref in outputs):
                    # some outputs exist, can't override them
                    raise OutputExistsError(taskDss.taskDef.taskName, outputs)

                for ref in outputs:
                    quantum.addOutput(ref)

                # add all inputs
                for datasetRefs in taskQuantaInputs[qkey].values():
                    for ref in datasetRefs.values():
                        quantum.addPredictedInput(ref)
                        _LOG.debug("add input: %s", ref)

                quanta.append(quantum)

            qgraph.append(QuantumGraphTaskNodes(taskDss.taskDef, quanta))

        return qgraph
Пример #22
0
def _pruner(
    datasetRefDict: _DatasetTracker[DatasetRef, QuantumNode],
    refsToRemove: Iterable[DatasetRef],
    *,
    alreadyPruned: Optional[Set[QuantumNode]] = None,
) -> None:
    r"""Prune supplied dataset refs out of datasetRefDict container, recursing
    to additional nodes dependant on pruned refs. This function modifies
    datasetRefDict in-place.

    Parameters
    ----------
    datasetRefDict : `_DatasetTracker[DatasetRef, QuantumNode]`
        The dataset tracker that maps `DatasetRef`\ s to the Quantum Nodes
        that produce/consume that `DatasetRef`
    refsToRemove : `Iterable` of `DatasetRef`
        The `DatasetRef`\ s which should be pruned from the input dataset
        tracker
    alreadyPruned : `set` of `QuantumNode`
        A set of nodes which have been pruned from the dataset tracker
    """
    if alreadyPruned is None:
        alreadyPruned = set()
    for ref in refsToRemove:
        # make a copy here, because this structure will be modified in
        # recursion, hitting a node more than once won't be much of an
        # issue, as we skip anything that has been processed
        nodes = set(datasetRefDict.getConsumers(ref))
        for node in nodes:
            # This node will never be associated with this ref
            datasetRefDict.removeConsumer(ref, node)
            if node in alreadyPruned:
                continue
            # find the connection corresponding to the input ref
            connectionRefs = node.quantum.inputs.get(ref.datasetType)
            if connectionRefs is None:
                # look to see if any inputs are component refs that match the
                # input ref to prune
                others = ref.datasetType.makeAllComponentDatasetTypes()
                # for each other component type check if there are assocated
                # refs
                for other in others:
                    connectionRefs = node.quantum.inputs.get(other)
                    if connectionRefs is not None:
                        # now search the component refs and see which one
                        # matches the ref to trim
                        for cr in connectionRefs:
                            if cr.makeCompositeRef() == ref:
                                toRemove = cr
                        break
                else:
                    # Ref must be an initInput ref and we want to ignore those
                    raise RuntimeError(
                        f"Cannot prune on non-Input dataset type {ref.datasetType.name}"
                    )
            else:
                toRemove = ref

            tmpRefs = set(connectionRefs).difference((toRemove, ))
            tmpConnections = NamedKeyDict[DatasetType, List[DatasetRef]](
                node.quantum.inputs.items())
            tmpConnections[toRemove.datasetType] = list(tmpRefs)
            helper = AdjustQuantumHelper(inputs=tmpConnections,
                                         outputs=node.quantum.outputs)
            assert node.quantum.dataId is not None, (
                "assert to make the type checker happy, it should not "
                "actually be possible to not have dataId set to None "
                "at this point")

            # Try to adjust the quantum with the reduced refs to make sure the
            # node will still satisfy all its conditions.
            #
            # If it can't because NoWorkFound is raised, that means a
            # connection is no longer present, and the node should be removed
            # from the graph.
            try:
                helper.adjust_in_place(node.taskDef.connections,
                                       node.taskDef.label, node.quantum.dataId)
                newQuantum = Quantum(
                    taskName=node.quantum.taskName,
                    taskClass=node.quantum.taskClass,
                    dataId=node.quantum.dataId,
                    initInputs=node.quantum.initInputs,
                    inputs=helper.inputs,
                    outputs=helper.outputs,
                )
                # If the inputs or outputs were adjusted to something different
                # than what was supplied by the graph builder, dissassociate
                # node from those refs, and if they are output refs, prune them
                # from downstream tasks. This means that based on new inputs
                # the task wants to produce fewer outputs, or consume fewer
                # inputs.
                for condition, existingMapping, newMapping, remover in (
                    (
                        helper.inputs_adjusted,
                        node.quantum.inputs,
                        helper.inputs,
                        datasetRefDict.removeConsumer,
                    ),
                    (
                        helper.outputs_adjusted,
                        node.quantum.outputs,
                        helper.outputs,
                        datasetRefDict.removeProducer,
                    ),
                ):
                    if condition:
                        notNeeded = set()
                        for key in existingMapping:
                            if key not in newMapping:
                                compositeRefs = (r if not r.isComponent() else
                                                 r.makeCompositeRef()
                                                 for r in existingMapping[key])
                                notNeeded |= set(compositeRefs)
                                continue
                            notNeeded |= set(existingMapping[key]) - set(
                                newMapping[key])
                        if notNeeded:
                            for ref in notNeeded:
                                if ref.isComponent():
                                    ref = ref.makeCompositeRef()
                                remover(ref, node)
                            if remover is datasetRefDict.removeProducer:
                                _pruner(datasetRefDict,
                                        notNeeded,
                                        alreadyPruned=alreadyPruned)
                object.__setattr__(node, "quantum", newQuantum)
                noWorkFound = False

            except NoWorkFound:
                noWorkFound = True

            if noWorkFound:
                # This will throw if the length is less than the minimum number
                for tmpRef in chain(
                        chain.from_iterable(node.quantum.inputs.values()),
                        node.quantum.initInputs.values()):
                    if tmpRef.isComponent():
                        tmpRef = tmpRef.makeCompositeRef()
                    datasetRefDict.removeConsumer(tmpRef, node)
                alreadyPruned.add(node)
                # prune all outputs produced by this node
                # mark that none of these will be produced
                forwardPrunes = set()
                for forwardRef in chain.from_iterable(
                        node.quantum.outputs.values()):
                    datasetRefDict.removeProducer(forwardRef, node)
                    forwardPrunes.add(forwardRef)
                _pruner(datasetRefDict,
                        forwardPrunes,
                        alreadyPruned=alreadyPruned)
Пример #23
0
    def build_quantum_graph(
        cls,
        task_def,
        registry,
        constraint_order,
        constraint_ranges,
        where=None,
        collections=None,
    ):
        """Generate a `QuantumGraph` for running just this task.

        This is a temporary workaround for incomplete butler query support for
        HEALPix dimensions.

        Parameters
        ----------
        task_def : `lsst.pipe.base.TaskDef`
            Task definition.
        registry : `lsst.daf.butler.Registry`
            Client for the butler database.  May be read-only.
        constraint_order : `int`
            HEALPix order used to contrain which quanta are generated, via
            ``constraint_indices``.  This should be a coarser grid (smaller
            order) than the order used for the task's quantum and output data
            IDs, and ideally something between the spatial scale of a patch or
            the data repository's "common skypix" system (usually ``htm7``).
        constraint_ranges : `lsst.sphgeom.RangeSet`
            RangeSet which describes constraint pixels (HEALPix NEST, with order
            constraint_order) to constrain generated quanta.
        where : `str`, optional
            A boolean `str` expression of the form accepted by
            `Registry.queryDatasets` to constrain input datasets.  This may
            contain a constraint on tracts, patches, or bands, but not HEALPix
            indices.  Constraints on tracts and patches should usually be
            unnecessary, however - existing coadds that overlap the given
            HEALpix indices will be selected without such a constraint, and
            providing one may reject some that should normally be included.
        collections : `str` or `Iterable` [ `str` ], optional
            Collection or collections to search for input datasets, in order.
            If not provided, ``registry.defaults.collections`` will be
            searched.
        """
        config = task_def.config

        dataset_types = pipeBase.PipelineDatasetTypes.fromPipeline(
            pipeline=[task_def], registry=registry)
        # Since we know this is the only task in the pipeline, we know there
        # is only one overall input and one overall output.
        (input_dataset_type, ) = dataset_types.inputs

        # Extract the main output dataset type (which needs multiple
        # DatasetRefs, and tells us the output HPX level), and make a set of
        # what remains for more mechanical handling later.
        output_dataset_type = dataset_types.outputs[
            task_def.connections.hips_exposures.name]
        incidental_output_dataset_types = dataset_types.outputs.copy()
        incidental_output_dataset_types.remove(output_dataset_type)
        (hpx_output_dimension, ) = (d for d in output_dataset_type.dimensions
                                    if isinstance(d, SkyPixDimension))

        constraint_hpx_pixelization = registry.dimensions[
            f"healpix{constraint_order}"].pixelization
        common_skypix_name = registry.dimensions.commonSkyPix.name
        common_skypix_pixelization = registry.dimensions.commonSkyPix.pixelization

        # We will need all the pixels at the quantum resolution as well
        task_dimensions = registry.dimensions.extract(
            task_def.connections.dimensions)
        (hpx_dimension, ) = (d for d in task_dimensions if d.name != "band")
        hpx_pixelization = hpx_dimension.pixelization

        if hpx_pixelization.level < constraint_order:
            raise ValueError(
                f"Quantum order {hpx_pixelization.level} must be < {constraint_order}"
            )
        hpx_ranges = constraint_ranges.scaled(4**(hpx_pixelization.level -
                                                  constraint_order))

        # We can be generous in looking for pixels here, because we constraint by actual
        # patch regions below.
        common_skypix_ranges = RangeSet()
        for begin, end in constraint_ranges:
            for hpx_index in range(begin, end):
                constraint_hpx_region = constraint_hpx_pixelization.pixel(
                    hpx_index)
                common_skypix_ranges |= common_skypix_pixelization.envelope(
                    constraint_hpx_region)

        # To keep the query from getting out of hand (and breaking) we simplify until we have fewer
        # than 100 ranges which seems to work fine.
        for simp in range(1, 10):
            if len(common_skypix_ranges) < 100:
                break
            common_skypix_ranges.simplify(simp)

        # Use that RangeSet to assemble a WHERE constraint expression.  This
        # could definitely get too big if the "constraint healpix" order is too
        # fine.
        where_terms = []
        bind = {}
        for n, (begin, end) in enumerate(common_skypix_ranges):
            stop = end - 1  # registry range syntax is inclusive
            if begin == stop:
                where_terms.append(f"{common_skypix_name} = cpx{n}")
                bind[f"cpx{n}"] = begin
            else:
                where_terms.append(
                    f"({common_skypix_name} >= cpx{n}a AND {common_skypix_name} <= cpx{n}b)"
                )
                bind[f"cpx{n}a"] = begin
                bind[f"cpx{n}b"] = stop
        if where is None:
            where = " OR ".join(where_terms)
        else:
            where = f"({where}) AND ({' OR '.join(where_terms)})"
        # Query for input datasets with this constraint, and ask for expanded
        # data IDs because we want regions.  Immediately group this by patch so
        # we don't do later geometric stuff n_bands more times than we need to.
        input_refs = registry.queryDatasets(input_dataset_type,
                                            where=where,
                                            findFirst=True,
                                            collections=collections,
                                            bind=bind).expanded()
        inputs_by_patch = defaultdict(set)
        patch_dimensions = registry.dimensions.extract(["patch"])
        for input_ref in input_refs:
            inputs_by_patch[input_ref.dataId.subset(patch_dimensions)].add(
                input_ref)
        if not inputs_by_patch:
            message_body = '\n'.join(input_refs.explain_no_results())
            raise RuntimeError(f"No inputs found:\n{message_body}")

        # Iterate over patches and compute the set of output healpix pixels
        # that overlap each one.  Use that to associate inputs with output
        # pixels, but only for the output pixels we've already identified.
        inputs_by_hpx = defaultdict(set)
        for patch_data_id, input_refs_for_patch in inputs_by_patch.items():
            patch_hpx_ranges = hpx_pixelization.envelope(patch_data_id.region)
            for begin, end in patch_hpx_ranges & hpx_ranges:
                for hpx_index in range(begin, end):
                    inputs_by_hpx[hpx_index].update(input_refs_for_patch)
        # Iterate over the dict we just created and create the actual quanta.
        quanta = []
        for hpx_index, input_refs_for_hpx_index in inputs_by_hpx.items():
            # Group inputs by band.
            input_refs_by_band = defaultdict(list)
            for input_ref in input_refs_for_hpx_index:
                input_refs_by_band[input_ref.dataId["band"]].append(input_ref)
            # Iterate over bands to make quanta.
            for band, input_refs_for_band in input_refs_by_band.items():
                data_id = registry.expandDataId({
                    hpx_dimension: hpx_index,
                    "band": band
                })

                hpx_pixel_ranges = RangeSet(hpx_index)
                hpx_output_ranges = hpx_pixel_ranges.scaled(
                    4**(config.hips_order - hpx_pixelization.level))
                output_data_ids = []
                for begin, end in hpx_output_ranges:
                    for hpx_output_index in range(begin, end):
                        output_data_ids.append(
                            registry.expandDataId({
                                hpx_output_dimension: hpx_output_index,
                                "band": band
                            }))
                outputs = {
                    dt: [DatasetRef(dt, data_id)]
                    for dt in incidental_output_dataset_types
                }
                outputs[output_dataset_type] = [
                    DatasetRef(output_dataset_type, data_id)
                    for data_id in output_data_ids
                ]
                quanta.append(
                    Quantum(
                        taskName=task_def.taskName,
                        taskClass=task_def.taskClass,
                        dataId=data_id,
                        initInputs={},
                        inputs={input_dataset_type: input_refs_for_band},
                        outputs=outputs,
                    ))

        if len(quanta) == 0:
            raise RuntimeError(
                "Given constraints yielded empty quantum graph.")

        return pipeBase.QuantumGraph(quanta={task_def: quanta})