示例#1
0
    def testRunQuantum(self):
        inputId = {
            "instrument": self.CAMERA_ID,
            "visit": self.VISIT_ID,
            "detector": self.CHIP_ID,
        }

        butler = self._makeButler()
        # self.task.config not persistable because it refers to a local class
        # We don't actually use the persisted config, so just make a new one
        butler.put(self.task.ConfigClass(), "apdb_marker", inputId)

        quantum = Quantum(taskClass=self.taskClass)
        quantum.addPredictedInput(
            ref_from_connection(butler, self.connections.dbInfo, inputId))
        quantum.addOutput(
            ref_from_connection(butler, self.connections.measurement, {
                "instrument": self.CAMERA_ID,
            }))

        run_quantum(self.task, butler, quantum)

        # Did output data ID get passed to DummyTask.run?
        measurement = butler.get(self.connections.measurement.name,
                                 instrument=self.CAMERA_ID)
        self.assertEqual(measurement.quantity,
                         len(self.CAMERA_ID) * u.dimensionless_unscaled)
 def testQuantum(self):
     registry = self.makeRegistry()
     if not registry.limited:
         registry.addDimensionEntry("instrument",
                                    {"instrument": "DummyCam"})
     run = registry.makeRun(collection="test")
     storageClass = StorageClass("testQuantum")
     registry.storageClasses.registerStorageClass(storageClass)
     # Make two predicted inputs
     datasetType1 = DatasetType(name="dst1",
                                dimensions=registry.dimensions.extract(
                                    ("instrument", )),
                                storageClass=storageClass)
     registry.registerDatasetType(datasetType1)
     ref1 = registry.addDataset(datasetType1,
                                dataId={"instrument": "DummyCam"},
                                run=run)
     datasetType2 = DatasetType(name="dst2",
                                dimensions=registry.dimensions.extract(
                                    ("instrument", )),
                                storageClass=storageClass)
     registry.registerDatasetType(datasetType2)
     ref2 = registry.addDataset(datasetType2,
                                dataId={"instrument": "DummyCam"},
                                run=run)
     # Create and add a Quantum
     quantum = Quantum(run=run,
                       task="some.fully.qualified.SuperTask",
                       startTime=datetime(2018, 1, 1),
                       endTime=datetime(2018, 1, 2),
                       host="localhost")
     quantum.addPredictedInput(ref1)
     quantum.addPredictedInput(ref2)
     # Quantum is not yet in Registry, so can't mark input as actual
     with self.assertRaises(KeyError):
         registry.markInputUsed(quantum, ref1)
     registry.addQuantum(quantum)
     # Now we can
     registry.markInputUsed(quantum, ref1)
     outQuantum = registry.getQuantum(quantum.id)
     self.assertEqual(outQuantum, quantum)
     # Removing a predictedInput dataset should be enough to remove the
     # Quantum; we don't want to allow Quantums with inaccurate information
     # to exist.
     registry.removeDataset(ref1)
     self.assertIsNone(registry.getQuantum(quantum.id))
    def _makeQuanta(self, config):
        """Create set of Quanta
        """
        run = Run(collection=1, environment=None, pipeline=None)

        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.input)
        dstype0 = descriptor.datasetType
        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.output)
        dstype1 = descriptor.datasetType

        quanta = []
        for visit in range(100):
            quantum = Quantum(run=run, task=None)
            quantum.addPredictedInput(self._makeDSRefVisit(dstype0, visit))
            quantum.addOutput(self._makeDSRefVisit(dstype1, visit))
            quanta.append(quantum)

        return quanta
示例#4
0
    def _makeQuanta(self, config):
        """Create set of Quanta
        """
        universe = DimensionUniverse()
        run = Run(collection=1, environment=None, pipeline=None)
        connections = config.connections.ConnectionsClass(config=config)

        dstype0 = connections.input.makeDatasetType(universe)
        dstype1 = connections.output.makeDatasetType(universe)

        quanta = []
        for visit in range(100):
            quantum = Quantum(run=run)
            quantum.addPredictedInput(
                self._makeDSRefVisit(dstype0, visit, universe))
            quantum.addOutput(self._makeDSRefVisit(dstype1, visit, universe))
            quanta.append(quantum)

        return quanta
    def _makeQuanta(self, config):
        """Create set of Quanta
        """
        universe = DimensionUniverse.fromConfig()
        run = Run(collection=1, environment=None, pipeline=None)

        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.input)
        dstype0 = descriptor.makeDatasetType(universe)
        descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.output)
        dstype1 = descriptor.makeDatasetType(universe)

        quanta = []
        for visit in range(100):
            quantum = Quantum(run=run, task=None)
            quantum.addPredictedInput(self._makeDSRefVisit(dstype0, visit))
            quantum.addOutput(self._makeDSRefVisit(dstype1, visit))
            quanta.append(quantum)

        return quanta
    def testAddInputsOutputs(self):
        """Test of addPredictedInput() method.
        """
        quantum = Quantum(taskName="some.task.object", run=None)

        # start with empty
        self.assertEqual(quantum.predictedInputs, dict())
        universe = DimensionUniverse()
        instrument = "DummyCam"
        datasetTypeName = "test_ds"
        storageClass = StorageClass("testref_StructuredData")
        datasetType = DatasetType(datasetTypeName,
                                  universe.extract(("instrument", "visit")),
                                  storageClass)

        # add one ref
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42))
        quantum.addPredictedInput(ref)
        self.assertIn(datasetTypeName, quantum.predictedInputs)
        self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 1)
        # add second ref
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43))
        quantum.addPredictedInput(ref)
        self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 2)

        # mark last ref as actually used
        self.assertEqual(quantum.actualInputs, dict())
        quantum._markInputUsed(ref)
        self.assertIn(datasetTypeName, quantum.actualInputs)
        self.assertEqual(len(quantum.actualInputs[datasetTypeName]), 1)

        # add couple of outputs too
        self.assertEqual(quantum.outputs, dict())
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42))
        quantum.addOutput(ref)
        self.assertIn(datasetTypeName, quantum.outputs)
        self.assertEqual(len(quantum.outputs[datasetTypeName]), 1)

        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43))
        quantum.addOutput(ref)
        self.assertEqual(len(quantum.outputs[datasetTypeName]), 2)
示例#7
0
    def _makeGraph(self, taskDatasets, inputs, outputs, initInputs, initOutputs, originInfo, userQuery):
        """Make QuantumGraph instance.

        Parameters
        ----------
        taskDatasets : sequence of `_TaskDatasetTypes`
            Tasks with their inputs and outputs.
        inputs : `set` of `DatasetType`
            Datasets which should already exist in input repository
        outputs : `set` of `DatasetType`
            Datasets which will be created by tasks
        initInputs : `set` of `DatasetType`
            Datasets which should exist in input repository, and will be used
            in task initialization
        initOutputs : `set` of `DatasetType`
            Datasets which which will be created in task initialization
        originInfo : `DatasetOriginInfo`
            Object which provides names of the input/output collections.
        userQuery : `str`
            String which defunes user-defined selection for registry, should be
            empty or `None` if there is no restrictions on data selection.

        Returns
        -------
        `QuantumGraph` instance.
        """
        parsedQuery = self._parseUserQuery(userQuery or "")
        expr = None if parsedQuery is None else str(parsedQuery)
        rows = self.registry.selectDimensions(originInfo, expr, inputs, outputs)

        # store result locally for multi-pass algorithm below
        # TODO: change it to single pass
        dimensionVerse = []
        for row in rows:
            _LOG.debug("row: %s", row)
            dimensionVerse.append(row)

        # Next step is to group by task quantum dimensions
        qgraph = QuantumGraph()
        qgraph._inputDatasetTypes = inputs
        qgraph._outputDatasetTypes = outputs
        for dsType in initInputs:
            for collection in originInfo.getInputCollections(dsType.name):
                result = self.registry.find(collection, dsType)
                if result is not None:
                    qgraph.initInputs.append(result)
                    break
            else:
                raise GraphBuilderError(f"Could not find initInput {dsType.name} in any input"
                                        " collection")
        for dsType in initOutputs:
            qgraph.initOutputs.append(DatasetRef(dsType, {}))

        for taskDss in taskDatasets:
            taskQuantaInputs = {}    # key is the quantum dataId (as tuple)
            taskQuantaOutputs = {}   # key is the quantum dataId (as tuple)
            qlinks = []
            for dimensionName in taskDss.taskDef.config.quantum.dimensions:
                dimension = self.dimensions[dimensionName]
                qlinks += dimension.link
            _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label, qlinks)

            # some rows will be non-unique for subset of dimensions, create
            # temporary structure to remove duplicates
            for row in dimensionVerse:
                qkey = tuple((col, row.dataId[col]) for col in qlinks)
                _LOG.debug("qkey: %s", qkey)

                def _dataRefKey(dataRef):
                    return tuple(sorted(dataRef.dataId.items()))

                qinputs = taskQuantaInputs.setdefault(qkey, {})
                for dsType in taskDss.inputs:
                    dataRefs = qinputs.setdefault(dsType, {})
                    dataRef = row.datasetRefs[dsType]
                    dataRefs[_dataRefKey(dataRef)] = dataRef
                    _LOG.debug("add input dataRef: %s %s", dsType.name, dataRef)

                qoutputs = taskQuantaOutputs.setdefault(qkey, {})
                for dsType in taskDss.outputs:
                    dataRefs = qoutputs.setdefault(dsType, {})
                    dataRef = row.datasetRefs[dsType]
                    dataRefs[_dataRefKey(dataRef)] = dataRef
                    _LOG.debug("add output dataRef: %s %s", dsType.name, dataRef)

            # pre-flight does not fill dataset components, and graph users
            # may need to know that, re-retrieve all input datasets to have
            # their components properly filled.
            for qinputs in taskQuantaInputs.values():
                for dataRefs in qinputs.values():
                    for key in dataRefs.keys():
                        if dataRefs[key].id is not None:
                            dataRefs[key] = self.registry.getDataset(dataRefs[key].id)

            # all nodes for this task
            quanta = []
            for qkey in taskQuantaInputs:
                # taskQuantaInputs and taskQuantaOutputs have the same keys
                _LOG.debug("make quantum for qkey: %s", qkey)
                quantum = Quantum(run=None, task=None)

                # add all outputs, but check first that outputs don't exist
                outputs = list(chain.from_iterable(dataRefs.values()
                                                   for dataRefs in taskQuantaOutputs[qkey].values()))
                for ref in outputs:
                    _LOG.debug("add output: %s", ref)
                if self.skipExisting and all(ref.id is not None for ref in outputs):
                    _LOG.debug("all output dataRefs already exist, skip quantum")
                    continue
                if any(ref.id is not None for ref in outputs):
                    # some outputs exist, can't override them
                    raise OutputExistsError(taskDss.taskDef.taskName, outputs)
                for ref in outputs:
                    quantum.addOutput(ref)

                # add all inputs
                for dataRefs in taskQuantaInputs[qkey].values():
                    for ref in dataRefs.values():
                        quantum.addPredictedInput(ref)
                        _LOG.debug("add input: %s", ref)

                quanta.append(quantum)

            qgraph.append(QuantumGraphNodes(taskDss.taskDef, quanta))

        return qgraph
    def _makeGraph(self,
                   taskDatasets,
                   required,
                   optional,
                   prerequisite,
                   initInputs,
                   initOutputs,
                   originInfo,
                   userQuery,
                   perDatasetTypeDimensions=()):
        """Make QuantumGraph instance.

        Parameters
        ----------
        taskDatasets : sequence of `_TaskDatasetTypes`
            Tasks with their inputs and outputs.
        required : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository in order to generate
            a QuantumGraph node that consumes them.
        optional : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that will be produced by the graph, but may exist in
            the repository.  If ``self.skipExisting`` and all outputs of a
            particular node already exist, it will be skipped.  Otherwise
            pre-existing datasets of these types will cause
            `OutputExistsError` to be raised.
        prerequisite : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository, but whose absence
            should cause `PrerequisiteMissingError` to be raised if they
            are needed by any graph node that would otherwise be created.
        initInputs : `set` of `DatasetType`
            Datasets which should exist in input repository, and will be used
            in task initialization
        initOutputs : `set` of `DatasetType`
            Datasets which which will be created in task initialization
        originInfo : `DatasetOriginInfo`
            Object which provides names of the input/output collections.
        userQuery : `str`
            String which defines user-defined selection for registry, should be
            empty or `None` if there is no restrictions on data selection.
        perDatasetTypeDimensions : iterable of `Dimension` or `str`
            Dimensions (or names thereof) that may have different values for
            different dataset types within the same quantum.

        Returns
        -------
        `QuantumGraph` instance.
        """
        rows = self.registry.selectMultipleDatasetTypes(
            originInfo,
            userQuery,
            required=required,
            optional=optional,
            prerequisite=prerequisite,
            perDatasetTypeDimensions=perDatasetTypeDimensions)

        # store result locally for multi-pass algorithm below
        # TODO: change it to single pass
        dimensionVerse = []
        try:
            for row in rows:
                _LOG.debug("row: %s", row)
                dimensionVerse.append(row)
        except LookupError as err:
            raise PrerequisiteMissingError(str(err)) from err

        # Next step is to group by task quantum dimensions
        qgraph = QuantumGraph()
        qgraph._inputDatasetTypes = (required | prerequisite)
        qgraph._outputDatasetTypes = optional
        for dsType in initInputs:
            for collection in originInfo.getInputCollections(dsType.name):
                result = self.registry.find(collection, dsType)
                if result is not None:
                    qgraph.initInputs.append(result)
                    break
            else:
                raise GraphBuilderError(
                    f"Could not find initInput {dsType.name} in any input"
                    " collection")
        for dsType in initOutputs:
            qgraph.initOutputs.append(DatasetRef(dsType, {}))

        for taskDss in taskDatasets:
            taskQuantaInputs = {}  # key is the quantum dataId (as tuple)
            taskQuantaOutputs = {}  # key is the quantum dataId (as tuple)
            qlinks = []
            for dimensionName in taskDss.taskDef.config.quantum.dimensions:
                dimension = self.dimensions[dimensionName]
                qlinks += dimension.links()
            _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label,
                       qlinks)

            # some rows will be non-unique for subset of dimensions, create
            # temporary structure to remove duplicates
            for row in dimensionVerse:
                qkey = tuple((col, row.dataId[col]) for col in qlinks)
                _LOG.debug("qkey: %s", qkey)

                def _datasetRefKey(datasetRef):
                    return tuple(sorted(datasetRef.dataId.items()))

                qinputs = taskQuantaInputs.setdefault(qkey, {})
                for dsType in taskDss.inputs:
                    datasetRefs = qinputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add input datasetRef: %s %s", dsType.name,
                               datasetRef)

                qoutputs = taskQuantaOutputs.setdefault(qkey, {})
                for dsType in taskDss.outputs:
                    datasetRefs = qoutputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add output datasetRef: %s %s", dsType.name,
                               datasetRef)

            # all nodes for this task
            quanta = []
            for qkey in taskQuantaInputs:
                # taskQuantaInputs and taskQuantaOutputs have the same keys
                _LOG.debug("make quantum for qkey: %s", qkey)
                quantum = Quantum(run=None, task=None)

                # add all outputs, but check first that outputs don't exist
                outputs = list(
                    chain.from_iterable(
                        datasetRefs.values()
                        for datasetRefs in taskQuantaOutputs[qkey].values()))
                for ref in outputs:
                    _LOG.debug("add output: %s", ref)
                if self.skipExisting and all(ref.id is not None
                                             for ref in outputs):
                    _LOG.debug(
                        "all output datasetRefs already exist, skip quantum")
                    continue
                if any(ref.id is not None for ref in outputs):
                    # some outputs exist, can't override them
                    raise OutputExistsError(taskDss.taskDef.taskName, outputs)

                for ref in outputs:
                    quantum.addOutput(ref)

                # add all inputs
                for datasetRefs in taskQuantaInputs[qkey].values():
                    for ref in datasetRefs.values():
                        quantum.addPredictedInput(ref)
                        _LOG.debug("add input: %s", ref)

                quanta.append(quantum)

            qgraph.append(QuantumGraphTaskNodes(taskDss.taskDef, quanta))

        return qgraph
    def _makeGraph(self, taskDatasets, required, optional, prerequisite,
                   initInputs, initOutputs, originInfo, userQuery,
                   perDatasetTypeDimensions=()):
        """Make QuantumGraph instance.

        Parameters
        ----------
        taskDatasets : sequence of `_TaskDatasetTypes`
            Tasks with their inputs and outputs.
        required : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository in order to generate
            a QuantumGraph node that consumes them.
        optional : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that will be produced by the graph, but may exist in
            the repository.  If ``self.skipExisting`` and all outputs of a
            particular node already exist, it will be skipped.  Otherwise
            pre-existing datasets of these types will cause
            `OutputExistsError` to be raised.
        prerequisite : `set` of `~lsst.daf.butler.DatasetType`
            Datasets that must exist in the repository, but whose absence
            should cause `PrerequisiteMissingError` to be raised if they
            are needed by any graph node that would otherwise be created.
        initInputs : `set` of `DatasetType`
            Datasets which should exist in input repository, and will be used
            in task initialization
        initOutputs : `set` of `DatasetType`
            Datasets which which will be created in task initialization
        originInfo : `DatasetOriginInfo`
            Object which provides names of the input/output collections.
        userQuery : `str`
            String which defines user-defined selection for registry, should be
            empty or `None` if there is no restrictions on data selection.
        perDatasetTypeDimensions : iterable of `Dimension` or `str`
            Dimensions (or names thereof) that may have different values for
            different dataset types within the same quantum.

        Returns
        -------
        `QuantumGraph` instance.
        """
        rows = self.registry.selectMultipleDatasetTypes(
            originInfo, userQuery,
            required=required, optional=optional, prerequisite=prerequisite,
            perDatasetTypeDimensions=perDatasetTypeDimensions
        )

        # store result locally for multi-pass algorithm below
        # TODO: change it to single pass
        dimensionVerse = []
        try:
            for row in rows:
                _LOG.debug("row: %s", row)
                dimensionVerse.append(row)
        except LookupError as err:
            raise PrerequisiteMissingError(str(err)) from err

        # Next step is to group by task quantum dimensions
        qgraph = QuantumGraph()
        qgraph._inputDatasetTypes = (required | prerequisite)
        qgraph._outputDatasetTypes = optional
        for dsType in initInputs:
            for collection in originInfo.getInputCollections(dsType.name):
                result = self.registry.find(collection, dsType)
                if result is not None:
                    qgraph.initInputs.append(result)
                    break
            else:
                raise GraphBuilderError(f"Could not find initInput {dsType.name} in any input"
                                        " collection")
        for dsType in initOutputs:
            qgraph.initOutputs.append(DatasetRef(dsType, {}))

        for taskDss in taskDatasets:
            taskQuantaInputs = {}    # key is the quantum dataId (as tuple)
            taskQuantaOutputs = {}   # key is the quantum dataId (as tuple)
            qlinks = []
            for dimensionName in taskDss.taskDef.config.quantum.dimensions:
                dimension = self.dimensions[dimensionName]
                qlinks += dimension.links()
            _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label, qlinks)

            # some rows will be non-unique for subset of dimensions, create
            # temporary structure to remove duplicates
            for row in dimensionVerse:
                qkey = tuple((col, row.dataId[col]) for col in qlinks)
                _LOG.debug("qkey: %s", qkey)

                def _datasetRefKey(datasetRef):
                    return tuple(sorted(datasetRef.dataId.items()))

                qinputs = taskQuantaInputs.setdefault(qkey, {})
                for dsType in taskDss.inputs:
                    datasetRefs = qinputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add input datasetRef: %s %s", dsType.name, datasetRef)

                qoutputs = taskQuantaOutputs.setdefault(qkey, {})
                for dsType in taskDss.outputs:
                    datasetRefs = qoutputs.setdefault(dsType, {})
                    datasetRef = row.datasetRefs[dsType]
                    datasetRefs[_datasetRefKey(datasetRef)] = datasetRef
                    _LOG.debug("add output datasetRef: %s %s", dsType.name, datasetRef)

            # all nodes for this task
            quanta = []
            for qkey in taskQuantaInputs:
                # taskQuantaInputs and taskQuantaOutputs have the same keys
                _LOG.debug("make quantum for qkey: %s", qkey)
                quantum = Quantum(run=None, task=None)

                # add all outputs, but check first that outputs don't exist
                outputs = list(chain.from_iterable(datasetRefs.values()
                                                   for datasetRefs in taskQuantaOutputs[qkey].values()))
                for ref in outputs:
                    _LOG.debug("add output: %s", ref)
                if self.skipExisting and all(ref.id is not None for ref in outputs):
                    _LOG.debug("all output datasetRefs already exist, skip quantum")
                    continue
                if any(ref.id is not None for ref in outputs):
                    # some outputs exist, can't override them
                    raise OutputExistsError(taskDss.taskDef.taskName, outputs)

                for ref in outputs:
                    quantum.addOutput(ref)

                # add all inputs
                for datasetRefs in taskQuantaInputs[qkey].values():
                    for ref in datasetRefs.values():
                        quantum.addPredictedInput(ref)
                        _LOG.debug("add input: %s", ref)

                quanta.append(quantum)

            qgraph.append(QuantumGraphTaskNodes(taskDss.taskDef, quanta))

        return qgraph