Пример #1
0
 def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding,
              datasetTypes: TaskDatasetTypes):
     universe = parent.dimensions.universe
     self.taskDef = taskDef
     self.dimensions = DimensionGraph(universe,
                                      names=taskDef.connections.dimensions)
     assert self.dimensions.issubset(parent.dimensions)
     # Initialize _DatasetDicts as subsets of the one or two
     # corresponding dicts in the parent _PipelineScaffolding.
     self.initInputs = _DatasetDict.fromSubset(datasetTypes.initInputs,
                                               parent.initInputs,
                                               parent.initIntermediates)
     self.initOutputs = _DatasetDict.fromSubset(datasetTypes.initOutputs,
                                                parent.initIntermediates,
                                                parent.initOutputs)
     self.inputs = _DatasetDict.fromSubset(datasetTypes.inputs,
                                           parent.inputs,
                                           parent.intermediates)
     self.outputs = _DatasetDict.fromSubset(datasetTypes.outputs,
                                            parent.intermediates,
                                            parent.outputs)
     self.prerequisites = _DatasetDict.fromSubset(
         datasetTypes.prerequisites, parent.prerequisites)
     self.dataIds = set()
     self.quanta = {}
Пример #2
0
    def randomDimensionSubset(
            self,
            n: int = 3,
            graph: Optional[DimensionGraph] = None) -> DimensionGraph:
        """Generate a random `DimensionGraph` that has a subset of the
        dimensions in a given one.

        Parameters
        ----------
        n : `int`
             Number of dimensions to select, before automatic expansion by
             `DimensionGraph`.
        dataIds : `DimensionGraph`, optional
            Dimensions to select ffrom.  Defaults to ``self.allDataIds.graph``.

        Returns
        -------
        selected : `DimensionGraph`
            ``n`` or more dimensions randomly selected from ``graph`` with
            replacement.
        """
        if graph is None:
            graph = self.allDataIds.graph
        return DimensionGraph(graph.universe,
                              names=self.rng.sample(
                                  list(graph.dimensions.names),
                                  max(n, len(graph.dimensions))))
Пример #3
0
 def runDefineVisits(self, pool=None):
     if self.task.defineVisits is None:
         return
     dimensions = DimensionGraph(self.task.universe, names=["exposure"])
     exposureDataIds = set(ref.dataId.subset(dimensions) for ref in self._rawRefs)
     self.task.log.info("Defining visits from exposures.")
     self.task.defineVisits.run(exposureDataIds, pool=pool)
Пример #4
0
 def testCalibrationDimensions(self):
     graph = DimensionGraph(self.universe, names=("physical_filter", "detector"))
     self.assertCountEqual(graph.dimensions.names,
                           ("instrument", "detector", "physical_filter", "band"))
     self.assertCountEqual(graph.required.names, ("instrument", "detector", "physical_filter"))
     self.assertCountEqual(graph.implied.names, ("band",))
     self.assertCountEqual(graph.elements.names, graph.dimensions.names)
     self.assertCountEqual(graph.governors.names, {"instrument"})
Пример #5
0
 def testSubsetCalculation(self):
     """Test that independent spatial and temporal options are computed
     correctly.
     """
     graph = DimensionGraph(self.universe, names=("visit", "detector", "tract", "patch", "htm7",
                                                  "exposure"))
     self.assertCountEqual(graph.spatial.names,
                           ("observation_regions", "skymap_regions", "htm"))
     self.assertCountEqual(graph.temporal.names,
                           ("observation_timespans",))
Пример #6
0
 def testSkyMapDimensions(self):
     graph = DimensionGraph(self.universe, names=("patch",))
     self.assertCountEqual(graph.dimensions.names, ("skymap", "tract", "patch"))
     self.assertCountEqual(graph.required.names, ("skymap", "tract", "patch"))
     self.assertCountEqual(graph.implied.names, ())
     self.assertCountEqual(graph.elements.names, graph.dimensions.names)
     self.assertCountEqual(graph.spatial.names, ("skymap_regions",))
     self.assertCountEqual(graph.governors.names, {"skymap"})
     self.assertEqual(graph.spatial.names, {"skymap_regions"})
     self.assertEqual(next(iter(graph.spatial)).governor, self.universe["skymap"])
Пример #7
0
 def testInstrumentDimensions(self):
     graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit"))
     self.assertCountEqual(graph.dimensions.names,
                           ("instrument", "exposure", "detector",
                            "visit", "physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.required.names, ("instrument", "exposure", "detector", "visit"))
     self.assertCountEqual(graph.implied.names, ("physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.elements.names - graph.dimensions.names,
                           ("visit_detector_region", "visit_definition"))
     self.assertCountEqual(graph.governors.names, {"instrument"})
Пример #8
0
 def testWithoutFilter(self):
     dimensions = DimensionGraph(universe=self.universe,
                                 names=["tract", "patch"])
     dataId = DataCoordinate.standardize(skymap=self.fixed["skymap"],
                                         tract=2,
                                         patch=6,
                                         universe=self.universe)
     packer = SkyMapDimensionPacker(self.fixed, dimensions)
     packedId = packer.pack(dataId)
     self.assertLessEqual(packedId.bit_length(), packer.maxBits)
     self.assertEqual(packer.unpack(packedId), dataId)
Пример #9
0
 def setUp(self):
     self.universe = DimensionUniverse()
     self.fixed = ExpandedDataCoordinate(
         DimensionGraph(universe=self.universe, names=["skymap"]),
         values=("unimportant", ),
         records={
             "skymap":
             self.universe["skymap"].RecordClass.fromDict({
                 "name": "unimportant",
                 "tract_max": 5,
                 "patch_nx_max": 3,
                 "patch_ny_max": 3,
             })
         })
Пример #10
0
    def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass",
                       run="run2", conform=True):
        """Make a simple DatasetRef"""
        if dataId is None:
            dataId = self.dataId

        # Pretend we have a parent if this looks like a composite
        compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName)
        parentStorageClass = DatasetType.PlaceholderParentStorageClass if componentName else None

        datasetType = DatasetType(datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()),
                                  StorageClass(storageClassName),
                                  parentStorageClass=parentStorageClass)
        return DatasetRef(datasetType, dataId, id=1, run=run, conform=conform)
Пример #11
0
 def setUp(self):
     self.universe = DimensionUniverse()
     self.fixed = DataCoordinate.fromFullValues(
         DimensionGraph(universe=self.universe, names=["skymap"]),
         values=("unimportant", ),
     ).expanded(
         records={
             "skymap":
             self.universe["skymap"].RecordClass(
                 name="unimportant",
                 tract_max=5,
                 patch_nx_max=3,
                 patch_ny_max=3,
             )
         })
Пример #12
0
 def testObservationDimensions(self):
     graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit"))
     self.assertCountEqual(graph.dimensions.names, ("instrument", "detector", "visit", "exposure",
                                                    "physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.required.names, ("instrument", "detector", "exposure", "visit"))
     self.assertCountEqual(graph.implied.names, ("physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.elements.names - graph.dimensions.names,
                           ("visit_detector_region", "visit_definition"))
     self.assertCountEqual(graph.spatial.names, ("observation_regions",))
     self.assertCountEqual(graph.temporal.names, ("observation_timespans",))
     self.assertCountEqual(graph.governors.names, {"instrument"})
     self.assertEqual(graph.spatial.names, {"observation_regions"})
     self.assertEqual(graph.temporal.names, {"observation_timespans"})
     self.assertEqual(next(iter(graph.spatial)).governor, self.universe["instrument"])
     self.assertEqual(next(iter(graph.temporal)).governor, self.universe["instrument"])
Пример #13
0
 def makeDatasetRef(self,
                    datasetTypeName,
                    dataId=None,
                    storageClassName="DefaultStorageClass",
                    conform=True):
     """Make a simple DatasetRef"""
     if dataId is None:
         dataId = self.dataId
     datasetType = DatasetType(
         datasetTypeName, DimensionGraph(self.universe,
                                         names=dataId.keys()),
         StorageClass(storageClassName))
     return DatasetRef(datasetType,
                       dataId,
                       id=1,
                       run="run2",
                       conform=conform)
Пример #14
0
    def testRegistryConfig(self):
        configFile = os.path.join(TESTDIR, "config", "basic",
                                  "posixDatastore.yaml")
        config = Config(configFile)
        universe = DimensionUniverse()
        self.factory.registerFormatters(config["datastore", "formatters"],
                                        universe=universe)

        # Create a DatasetRef with and without instrument matching the
        # one in the config file.
        dimensions = universe.extract(
            ("visit", "physical_filter", "instrument"))
        sc = StorageClass("DummySC", dict, None)
        refPviHsc = self.makeDatasetRef("pvi",
                                        dimensions,
                                        sc, {
                                            "instrument": "DummyHSC",
                                            "physical_filter": "v"
                                        },
                                        conform=False)
        refPviHscFmt = self.factory.getFormatterClass(refPviHsc)
        self.assertIsFormatter(refPviHscFmt)
        self.assertIn("JsonFormatter", refPviHscFmt.name())

        refPviNotHsc = self.makeDatasetRef("pvi",
                                           dimensions,
                                           sc, {
                                               "instrument": "DummyNotHSC",
                                               "physical_filter": "v"
                                           },
                                           conform=False)
        refPviNotHscFmt = self.factory.getFormatterClass(refPviNotHsc)
        self.assertIsFormatter(refPviNotHscFmt)
        self.assertIn("PickleFormatter", refPviNotHscFmt.name())

        # Create a DatasetRef that should fall back to using Dimensions
        refPvixHsc = self.makeDatasetRef("pvix",
                                         dimensions,
                                         sc, {
                                             "instrument": "DummyHSC",
                                             "physical_filter": "v"
                                         },
                                         conform=False)
        refPvixNotHscFmt = self.factory.getFormatterClass(refPvixHsc)
        self.assertIsFormatter(refPvixNotHscFmt)
        self.assertIn("PickleFormatter", refPvixNotHscFmt.name())

        # Create a DatasetRef that should fall back to using StorageClass
        dimensionsNoV = DimensionGraph(universe,
                                       names=("physical_filter", "instrument"))
        refPvixNotHscDims = self.makeDatasetRef("pvix",
                                                dimensionsNoV,
                                                sc, {
                                                    "instrument": "DummyHSC",
                                                    "physical_filter": "v"
                                                },
                                                conform=False)
        refPvixNotHscDims_fmt = self.factory.getFormatterClass(
            refPvixNotHscDims)
        self.assertIsFormatter(refPvixNotHscDims_fmt)
        self.assertIn("YamlFormatter", refPvixNotHscDims_fmt.name())
Пример #15
0
    def testRegistryConfig(self):
        configFile = os.path.join(TESTDIR, "config", "basic", "posixDatastore.yaml")
        config = Config(configFile)
        universe = DimensionUniverse()
        self.factory.registerFormatters(config["datastore", "formatters"], universe=universe)

        # Create a DatasetRef with and without instrument matching the
        # one in the config file.
        dimensions = universe.extract(("visit", "physical_filter", "instrument"))
        sc = StorageClass("DummySC", dict, None)
        refPviHsc = self.makeDatasetRef("pvi", dimensions, sc, {"instrument": "DummyHSC",
                                                                "physical_filter": "v"},
                                        conform=False)
        refPviHscFmt = self.factory.getFormatterClass(refPviHsc)
        self.assertIsFormatter(refPviHscFmt)
        self.assertIn("JsonFormatter", refPviHscFmt.name())

        refPviNotHsc = self.makeDatasetRef("pvi", dimensions, sc, {"instrument": "DummyNotHSC",
                                                                   "physical_filter": "v"},
                                           conform=False)
        refPviNotHscFmt = self.factory.getFormatterClass(refPviNotHsc)
        self.assertIsFormatter(refPviNotHscFmt)
        self.assertIn("PickleFormatter", refPviNotHscFmt.name())

        # Create a DatasetRef that should fall back to using Dimensions
        refPvixHsc = self.makeDatasetRef("pvix", dimensions, sc, {"instrument": "DummyHSC",
                                                                  "physical_filter": "v"},
                                         conform=False)
        refPvixNotHscFmt = self.factory.getFormatterClass(refPvixHsc)
        self.assertIsFormatter(refPvixNotHscFmt)
        self.assertIn("PickleFormatter", refPvixNotHscFmt.name())

        # Create a DatasetRef that should fall back to using StorageClass
        dimensionsNoV = DimensionGraph(universe, names=("physical_filter", "instrument"))
        refPvixNotHscDims = self.makeDatasetRef("pvix", dimensionsNoV, sc, {"instrument": "DummyHSC",
                                                                            "physical_filter": "v"},
                                                conform=False)
        refPvixNotHscDims_fmt = self.factory.getFormatterClass(refPvixNotHscDims)
        self.assertIsFormatter(refPvixNotHscDims_fmt)
        self.assertIn("YamlFormatter", refPvixNotHscDims_fmt.name())

        # Check that parameters are stored
        refParam = self.makeDatasetRef("paramtest", dimensions, sc, {"instrument": "DummyNotHSC",
                                                                     "physical_filter": "v"},
                                       conform=False)
        lookup, refParam_fmt, kwargs = self.factory.getFormatterClassWithMatch(refParam)
        self.assertIn("writeParameters", kwargs)
        expected = {"max": 5, "min": 2, "comment": "Additional commentary", "recipe": "recipe1"}
        self.assertEqual(kwargs["writeParameters"], expected)
        self.assertIn("FormatterTest", refParam_fmt.name())

        f = self.factory.getFormatter(refParam, self.fileDescriptor)
        self.assertEqual(f.writeParameters, expected)

        f = self.factory.getFormatter(refParam, self.fileDescriptor, writeParameters={"min": 22,
                                                                                      "extra": 50})
        self.assertEqual(f.writeParameters, {"max": 5, "min": 22, "comment": "Additional commentary",
                                             "extra": 50, "recipe": "recipe1"})

        self.assertIn("recipe1", f.writeRecipes)
        self.assertEqual(f.writeParameters["recipe"], "recipe1")

        with self.assertRaises(ValueError):
            # "new" is not allowed as a write parameter
            self.factory.getFormatter(refParam, self.fileDescriptor, writeParameters={"new": 1})

        with self.assertRaises(RuntimeError):
            # "mode" is a required recipe parameter
            self.factory.getFormatter(refParam, self.fileDescriptor, writeRecipes={"recipe3": {"notmode": 1}})
Пример #16
0
class _TaskScaffolding:
    """Helper class aggregating information about a `PipelineTask`, used when
    constructing a `QuantumGraph`.

    See `_PipelineScaffolding` for a top-down description of the full
    scaffolding data structure.

    Parameters
    ----------
    taskDef : `TaskDef`
        Data structure that identifies the task class and its config.
    parent : `_PipelineScaffolding`
        The parent data structure that will hold the instance being
        constructed.
    datasetTypes : `TaskDatasetTypes`
        Data structure that categorizes the dataset types used by this task.
    """
    def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding,
                 datasetTypes: TaskDatasetTypes):
        universe = parent.dimensions.universe
        self.taskDef = taskDef
        self.dimensions = DimensionGraph(universe,
                                         names=taskDef.connections.dimensions)
        assert self.dimensions.issubset(parent.dimensions)
        # Initialize _DatasetDicts as subsets of the one or two
        # corresponding dicts in the parent _PipelineScaffolding.
        self.initInputs = _DatasetDict.fromSubset(datasetTypes.initInputs,
                                                  parent.initInputs,
                                                  parent.initIntermediates)
        self.initOutputs = _DatasetDict.fromSubset(datasetTypes.initOutputs,
                                                   parent.initIntermediates,
                                                   parent.initOutputs)
        self.inputs = _DatasetDict.fromSubset(datasetTypes.inputs,
                                              parent.inputs,
                                              parent.intermediates)
        self.outputs = _DatasetDict.fromSubset(datasetTypes.outputs,
                                               parent.intermediates,
                                               parent.outputs)
        self.prerequisites = _DatasetDict.fromSubset(
            datasetTypes.prerequisites, parent.prerequisites)
        self.dataIds = set()
        self.quanta = {}

    def __repr__(self):
        # Default dataclass-injected __repr__ gets caught in an infinite loop
        # because of back-references.
        return f"_TaskScaffolding(taskDef={self.taskDef}, ...)"

    taskDef: TaskDef
    """Data structure that identifies the task class and its config
    (`TaskDef`).
    """

    dimensions: DimensionGraph
    """The dimensions of a single `Quantum` of this task (`DimensionGraph`).
    """

    initInputs: _DatasetDict
    """Dictionary containing information about datasets used to construct this
    task (`_DatasetDict`).
    """

    initOutputs: _DatasetDict
    """Dictionary containing information about datasets produced as a
    side-effect of constructing this task (`_DatasetDict`).
    """

    inputs: _DatasetDict
    """Dictionary containing information about datasets used as regular,
    graph-constraining inputs to this task (`_DatasetDict`).
    """

    outputs: _DatasetDict
    """Dictionary containing information about datasets produced by this task
    (`_DatasetDict`).
    """

    prerequisites: _DatasetDict
    """Dictionary containing information about input datasets that must be
    present in the repository before any Pipeline containing this task is run
    (`_DatasetDict`).
    """

    quanta: Dict[DataCoordinate, _QuantumScaffolding]
    """Dictionary mapping data ID to a scaffolding object for the Quantum of
    this task with that data ID.
    """

    def makeQuantumSet(self) -> Set[Quantum]:
        """Create a `set` of `Quantum` from the information in ``self``.

        Returns
        -------
        nodes : `set` of `Quantum
            The `Quantum` elements corresponding to this task.
        """
        return set(q.makeQuantum() for q in self.quanta.values())