Exemplo n.º 1
0
 def testMatplotlibFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("test_plot", [],
                               "Plot",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     # Does not have to be a random image
     pyplot.imshow([
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
     ])
     ref = butler.put(pyplot.gcf(), datasetType)
     uri = butler.getURI(ref)
     # The test after this will not work if we don't have local file
     self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}")
     with tempfile.NamedTemporaryFile(suffix=".png") as file:
         pyplot.gcf().savefig(file.name)
         self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True))
     self.assertTrue(butler.datasetExists(ref))
     with self.assertRaises(ValueError):
         butler.get(ref)
     butler.pruneDatasets([ref], unstore=True, purge=True)
     with self.assertRaises(LookupError):
         butler.datasetExists(ref)
Exemplo n.º 2
0
    def testMatplotlibFormatter(self):
        butler = Butler(self.root, run="testrun")
        datasetType = DatasetType("test_plot", [], "Plot",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        # Does not have to be a random image
        pyplot.imshow([self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       ])
        ref = butler.put(pyplot.gcf(), datasetType)
        uri = butler.getURI(ref)

        # Following test needs a local file
        with uri.as_local() as local:
            with tempfile.NamedTemporaryFile(suffix=".png") as file:
                pyplot.gcf().savefig(file.name)
                self.assertTrue(
                    filecmp.cmp(
                        local.ospath,
                        file.name,
                        shallow=True
                    )
                )
        self.assertTrue(butler.datasetExists(ref))
        with self.assertRaises(ValueError):
            butler.get(ref)
        butler.pruneDatasets([ref], unstore=True, purge=True)
        with self.assertRaises(LookupError):
            butler.datasetExists(ref)
Exemplo n.º 3
0
 def testImportExport(self):
     # Run put/get tests just to create and populate a repo.
     storageClass = self.storageClassFactory.getStorageClass(
         "StructuredDataNoComponents")
     exportButler = self.runPutGetTest(storageClass, "test_metric")
     # Test that the repo actually has at least one dataset.
     datasets = list(
         exportButler.registry.queryDatasets(..., collections=...))
     self.assertGreater(len(datasets), 0)
     # Export those datasets.  We used TemporaryDirectory because there
     # doesn't seem to be a way to get the filename (as opposed to the file
     # object) from any of tempfile's temporary-file context managers.
     with tempfile.TemporaryDirectory() as exportDir:
         # TODO: When PosixDatastore supports transfer-on-exist, add tests
         # for that.
         exportFile = os.path.join(exportDir, "exports.yaml")
         with exportButler.export(filename=exportFile) as export:
             export.saveDatasets(datasets)
         self.assertTrue(os.path.exists(exportFile))
         with tempfile.TemporaryDirectory() as importDir:
             Butler.makeRepo(importDir, config=Config(self.configFile))
             importButler = Butler(importDir, run="ingest")
             importButler.import_(filename=exportFile,
                                  directory=exportButler.datastore.root,
                                  transfer="symlink")
             for ref in datasets:
                 with self.subTest(ref=ref):
                     # Test for existence by passing in the DatasetType and
                     # data ID separately, to avoid lookup by dataset_id.
                     self.assertTrue(
                         importButler.datasetExists(ref.datasetType,
                                                    ref.dataId))
Exemplo n.º 4
0
 def testMatplotlibFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("test_plot", [],
                               "Plot",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     pyplot.imshow(np.random.randn(3, 4))
     ref = butler.put(pyplot.gcf(), datasetType)
     parsed = urllib.parse.urlparse(butler.getUri(ref))
     with tempfile.NamedTemporaryFile(suffix=".png") as file:
         pyplot.gcf().savefig(file.name)
         self.assertTrue(filecmp.cmp(parsed.path, file.name, shallow=True))
     self.assertTrue(butler.datasetExists(ref))
     with self.assertRaises(ValueError):
         butler.get(ref)
     butler.remove(ref)
     with self.assertRaises(LookupError):
         butler.datasetExists(ref)
Exemplo n.º 5
0
    def runPutGetTest(self, storageClass, datasetTypeName):
        butler = Butler(self.tmpConfigFile)

        # There will not be a collection yet
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, set())

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.addDimensionEntry("instrument",
                                          {"instrument": "DummyCamComp"})
        butler.registry.addDimensionEntry("physical_filter", {
            "instrument": "DummyCamComp",
            "physical_filter": "d-r"
        })
        butler.registry.addDimensionEntry("visit", {
            "instrument": "DummyCamComp",
            "visit": 423,
            "physical_filter": "d-r"
        })

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCamComp", "visit": 423}

        # Create a DatasetRef for put
        refIn = DatasetRef(datasetType, dataId, id=None)

        # Put with a preexisting id should fail
        with self.assertRaises(ValueError):
            butler.put(metric, DatasetRef(datasetType, dataId, id=100))

        # Put and remove the dataset once as a DatasetRef, once as a dataId,
        # and once with a DatasetType
        for args in ((refIn, ), (datasetTypeName, dataId), (datasetType,
                                                            dataId)):
            with self.subTest(args=args):
                ref = butler.put(metric, *args)
                self.assertIsInstance(ref, DatasetRef)

                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(ref.datasetType.name, dataId)
                self.assertEqual(metric, metricOut)
                # Test get with a datasetRef
                metricOut = butler.get(ref)
                self.assertEqual(metric, metricOut)

                # Check we can get components
                if storageClass.isComposite():
                    self.assertGetComponents(butler, ref,
                                             ("summary", "data", "output"),
                                             metric)

                # Remove from collection only; after that we shouldn't be able
                # to find it unless we use the dataset_id.
                butler.remove(*args, delete=False)
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # If we use the output ref with the dataset_id, we should
                # still be able to load it with getDirect().
                self.assertEqual(metric, butler.getDirect(ref))

                # Reinsert into collection, then delete from Datastore *and*
                # remove from collection.
                butler.registry.associate(butler.collection, [ref])
                butler.remove(*args)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # Now getDirect() should fail, too.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry still knows about it, if we use the dataset_id.
                self.assertEqual(butler.registry.getDataset(ref.id), ref)

                # Put again, then remove completely (this generates a new
                # dataset record in registry, with a new ID - the old one
                # still exists but it is not in any collection so we don't
                # care).
                ref = butler.put(metric, *args)
                butler.remove(*args, remember=False)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # getDirect() should still fail.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry shouldn't be able to find it by dataset_id anymore.
                self.assertIsNone(butler.registry.getDataset(ref.id))

        # Put the dataset again, since the last thing we did was remove it.
        ref = butler.put(metric, refIn)

        # Get with parameters
        stop = 4
        sliced = butler.get(ref, parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)

        # Combining a DatasetRef with a dataId should fail
        with self.assertRaises(ValueError):
            butler.get(ref, dataId)
        # Getting with an explicit ref should fail if the id doesn't match
        with self.assertRaises(ValueError):
            butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))

        # Getting a dataset with unknown parameters should fail
        with self.assertRaises(KeyError):
            butler.get(ref, parameters={"unsupported": True})

        # Check we have a collection
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, {
            "ingest",
        })
Exemplo n.º 6
0
    def testIngest(self):
        butler = Butler(self.tmpConfigFile, run="ingest")

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit", "detector"])

        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredDataDictYaml")
        datasetTypeName = "metric"

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        for detector in (1, 2):
            butler.registry.insertDimensionData(
                "detector", {
                    "instrument": "DummyCamComp",
                    "id": detector,
                    "full_name": f"detector{detector}"
                })

        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            }, {
                "instrument": "DummyCamComp",
                "id": 424,
                "name": "fourtwentyfour",
                "physical_filter": "d-r"
            })

        formatter = doImport(
            "lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
        dataRoot = os.path.join(TESTDIR, "data", "basic")
        datasets = []
        for detector in (1, 2):
            detector_name = f"detector_{detector}"
            metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
            dataId = {
                "instrument": "DummyCamComp",
                "visit": 423,
                "detector": detector
            }
            # Create a DatasetRef for ingest
            refIn = DatasetRef(datasetType, dataId, id=None)

            datasets.append(
                FileDataset(path=metricFile, refs=[refIn],
                            formatter=formatter))

        butler.ingest(*datasets, transfer="copy")

        dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
        dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}

        metrics1 = butler.get(datasetTypeName, dataId1)
        metrics2 = butler.get(datasetTypeName, dataId2)
        self.assertNotEqual(metrics1, metrics2)

        # Compare URIs
        uri1 = butler.getUri(datasetTypeName, dataId1)
        uri2 = butler.getUri(datasetTypeName, dataId2)
        self.assertNotEqual(uri1, uri2)

        # Now do a multi-dataset but single file ingest
        metricFile = os.path.join(dataRoot, "detectors.yaml")
        refs = []
        for detector in (1, 2):
            detector_name = f"detector_{detector}"
            dataId = {
                "instrument": "DummyCamComp",
                "visit": 424,
                "detector": detector
            }
            # Create a DatasetRef for ingest
            refs.append(DatasetRef(datasetType, dataId, id=None))

        datasets = []
        datasets.append(
            FileDataset(path=metricFile,
                        refs=refs,
                        formatter=MultiDetectorFormatter))

        butler.ingest(*datasets, transfer="copy")

        dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
        dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}

        multi1 = butler.get(datasetTypeName, dataId1)
        multi2 = butler.get(datasetTypeName, dataId2)

        self.assertEqual(multi1, metrics1)
        self.assertEqual(multi2, metrics2)

        # Compare URIs
        uri1 = butler.getUri(datasetTypeName, dataId1)
        uri2 = butler.getUri(datasetTypeName, dataId2)
        self.assertEqual(uri1, uri2)

        # Test that removing one does not break the second
        butler.remove(datasetTypeName, dataId1)
        with self.assertRaises(LookupError):
            butler.datasetExists(datasetTypeName, dataId1)
        self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
        multi2b = butler.get(datasetTypeName, dataId2)
        self.assertEqual(multi2, multi2b)
Exemplo n.º 7
0
    def runPutGetTest(self, storageClass, datasetTypeName):
        butler = Butler(self.tmpConfigFile, run="ingest")

        # There will not be a collection yet
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, set())

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            })

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCamComp", "visit": 423}

        # Create a DatasetRef for put
        refIn = DatasetRef(datasetType, dataId, id=None)

        # Put with a preexisting id should fail
        with self.assertRaises(ValueError):
            butler.put(metric, DatasetRef(datasetType, dataId, id=100))

        # Put and remove the dataset once as a DatasetRef, once as a dataId,
        # and once with a DatasetType
        for args in ((refIn, ), (datasetTypeName, dataId), (datasetType,
                                                            dataId)):
            with self.subTest(args=args):
                ref = butler.put(metric, *args)
                self.assertIsInstance(ref, DatasetRef)

                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(ref.datasetType.name, dataId)
                self.assertEqual(metric, metricOut)
                # Test get with a datasetRef
                metricOut = butler.get(ref)
                self.assertEqual(metric, metricOut)
                # Test getDeferred with dataId
                metricOut = butler.getDeferred(ref.datasetType.name,
                                               dataId).get()
                self.assertEqual(metric, metricOut)
                # Test getDeferred with a datasetRef
                metricOut = butler.getDeferred(ref).get()
                self.assertEqual(metric, metricOut)

                # Check we can get components
                if storageClass.isComposite():
                    self.assertGetComponents(butler, ref,
                                             ("summary", "data", "output"),
                                             metric)

                # Remove from collection only; after that we shouldn't be able
                # to find it unless we use the dataset_id.
                butler.remove(*args, delete=False)
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # If we use the output ref with the dataset_id, we should
                # still be able to load it with getDirect().
                self.assertEqual(metric, butler.getDirect(ref))

                # Reinsert into collection, then delete from Datastore *and*
                # remove from collection.
                butler.registry.associate(butler.collection, [ref])
                butler.remove(*args)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # Now getDirect() should fail, too.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry still knows about it, if we use the dataset_id.
                self.assertEqual(butler.registry.getDataset(ref.id), ref)

                # Put again, then remove completely (this generates a new
                # dataset record in registry, with a new ID - the old one
                # still exists but it is not in any collection so we don't
                # care).
                ref = butler.put(metric, *args)
                butler.remove(*args, remember=False)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # getDirect() should still fail.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry shouldn't be able to find it by dataset_id anymore.
                self.assertIsNone(butler.registry.getDataset(ref.id))

        # Put the dataset again, since the last thing we did was remove it.
        ref = butler.put(metric, refIn)

        # Get with parameters
        stop = 4
        sliced = butler.get(ref, parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)
        # getDeferred with parameters
        sliced = butler.getDeferred(ref, parameters={
            "slice": slice(stop)
        }).get()
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)
        # getDeferred with deferred parameters
        sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)

        if storageClass.isComposite():
            # Delete one component and check that the other components
            # can still be retrieved
            metricOut = butler.get(ref.datasetType.name, dataId)
            compNameS = DatasetType.nameWithComponent(datasetTypeName,
                                                      "summary")
            compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
            summary = butler.get(compNameS, dataId)
            self.assertEqual(summary, metric.summary)
            self.assertTrue(butler.datastore.exists(ref.components["summary"]))

            butler.remove(compNameS, dataId, remember=True)
            with self.assertRaises(LookupError):
                butler.datasetExists(compNameS, dataId)
            self.assertFalse(butler.datastore.exists(
                ref.components["summary"]))
            self.assertTrue(butler.datastore.exists(ref.components["data"]))
            data = butler.get(compNameD, dataId)
            self.assertEqual(data, metric.data)

        # Combining a DatasetRef with a dataId should fail
        with self.assertRaises(ValueError):
            butler.get(ref, dataId)
        # Getting with an explicit ref should fail if the id doesn't match
        with self.assertRaises(ValueError):
            butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))

        # Getting a dataset with unknown parameters should fail
        with self.assertRaises(KeyError):
            butler.get(ref, parameters={"unsupported": True})

        # Check we have a collection
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, {
            "ingest",
        })

        # Clean up to check that we can remove something that may have
        # already had a component removed
        butler.remove(ref.datasetType.name, dataId)

        # Add a dataset back in since some downstream tests require
        # something to be present
        ref = butler.put(metric, refIn)

        return butler

        # Construct a butler with no run or collection, but make it writeable.
        butler = Butler(self.tmpConfigFile, writeable=True)
        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        datasetType = self.addDatasetType(
            "example", dimensions,
            self.storageClassFactory.getStorageClass("StructuredData"),
            butler.registry)
        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            })
        dataId = {"instrument": "DummyCamComp", "visit": 423}
        # Create dataset.
        metric = makeExampleMetrics()
        # Register a new run and put dataset.
        run = "deferred"
        butler.registry.registerRun(run)
        ref = butler.put(metric, datasetType, dataId, run=run)
        # Putting with no run should fail with TypeError.
        with self.assertRaises(TypeError):
            butler.put(metric, datasetType, dataId)
        # Dataset should exist.
        self.assertTrue(
            butler.datasetExists(datasetType, dataId, collection=run))
        # We should be able to get the dataset back, but with and without
        # a deferred dataset handle.
        self.assertEqual(metric, butler.get(datasetType,
                                            dataId,
                                            collection=run))
        self.assertEqual(
            metric,
            butler.getDeferred(datasetType, dataId, collection=run).get())
        # Trying to find the dataset without any collection is a TypeError.
        with self.assertRaises(TypeError):
            butler.datasetExists(datasetType, dataId)
        with self.assertRaises(TypeError):
            butler.get(datasetType, dataId)
        with self.assertRaises(TypeError):
            butler.remove(datasetType, dataId)
        # Associate the dataset with a different collection.
        butler.registry.associate("tagged", [ref])
        # Deleting the dataset from the new collection should make it findable
        # in the original collection but without a Datastore entry.
        butler.remove(datasetType, dataId, collection="tagged")
        self.assertFalse(
            butler.datasetExists(datasetType, dataId, collection=run))