def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([ self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # The test after this will not work if we don't have local file self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}") with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # Following test needs a local file with uri.as_local() as local: with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue( filecmp.cmp( local.ospath, file.name, shallow=True ) ) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testImportExport(self): # Run put/get tests just to create and populate a repo. storageClass = self.storageClassFactory.getStorageClass( "StructuredDataNoComponents") exportButler = self.runPutGetTest(storageClass, "test_metric") # Test that the repo actually has at least one dataset. datasets = list( exportButler.registry.queryDatasets(..., collections=...)) self.assertGreater(len(datasets), 0) # Export those datasets. We used TemporaryDirectory because there # doesn't seem to be a way to get the filename (as opposed to the file # object) from any of tempfile's temporary-file context managers. with tempfile.TemporaryDirectory() as exportDir: # TODO: When PosixDatastore supports transfer-on-exist, add tests # for that. exportFile = os.path.join(exportDir, "exports.yaml") with exportButler.export(filename=exportFile) as export: export.saveDatasets(datasets) self.assertTrue(os.path.exists(exportFile)) with tempfile.TemporaryDirectory() as importDir: Butler.makeRepo(importDir, config=Config(self.configFile)) importButler = Butler(importDir, run="ingest") importButler.import_(filename=exportFile, directory=exportButler.datastore.root, transfer="symlink") for ref in datasets: with self.subTest(ref=ref): # Test for existence by passing in the DatasetType and # data ID separately, to avoid lookup by dataset_id. self.assertTrue( importButler.datasetExists(ref.datasetType, ref.dataId))
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) pyplot.imshow(np.random.randn(3, 4)) ref = butler.put(pyplot.gcf(), datasetType) parsed = urllib.parse.urlparse(butler.getUri(ref)) with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(parsed.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.remove(ref) with self.assertRaises(LookupError): butler.datasetExists(ref)
def runPutGetTest(self, storageClass, datasetTypeName): butler = Butler(self.tmpConfigFile) # There will not be a collection yet collections = butler.registry.getAllCollections() self.assertEqual(collections, set()) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.addDimensionEntry("instrument", {"instrument": "DummyCamComp"}) butler.registry.addDimensionEntry("physical_filter", { "instrument": "DummyCamComp", "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() dataId = {"instrument": "DummyCamComp", "visit": 423} # Create a DatasetRef for put refIn = DatasetRef(datasetType, dataId, id=None) # Put with a preexisting id should fail with self.assertRaises(ValueError): butler.put(metric, DatasetRef(datasetType, dataId, id=100)) # Put and remove the dataset once as a DatasetRef, once as a dataId, # and once with a DatasetType for args in ((refIn, ), (datasetTypeName, dataId), (datasetType, dataId)): with self.subTest(args=args): ref = butler.put(metric, *args) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(ref.datasetType.name, dataId) self.assertEqual(metric, metricOut) # Test get with a datasetRef metricOut = butler.get(ref) self.assertEqual(metric, metricOut) # Check we can get components if storageClass.isComposite(): self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) # Remove from collection only; after that we shouldn't be able # to find it unless we use the dataset_id. butler.remove(*args, delete=False) with self.assertRaises(LookupError): butler.datasetExists(*args) # If we use the output ref with the dataset_id, we should # still be able to load it with getDirect(). self.assertEqual(metric, butler.getDirect(ref)) # Reinsert into collection, then delete from Datastore *and* # remove from collection. butler.registry.associate(butler.collection, [ref]) butler.remove(*args) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # Now getDirect() should fail, too. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry still knows about it, if we use the dataset_id. self.assertEqual(butler.registry.getDataset(ref.id), ref) # Put again, then remove completely (this generates a new # dataset record in registry, with a new ID - the old one # still exists but it is not in any collection so we don't # care). ref = butler.put(metric, *args) butler.remove(*args, remember=False) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # getDirect() should still fail. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry shouldn't be able to find it by dataset_id anymore. self.assertIsNone(butler.registry.getDataset(ref.id)) # Put the dataset again, since the last thing we did was remove it. ref = butler.put(metric, refIn) # Get with parameters stop = 4 sliced = butler.get(ref, parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # Combining a DatasetRef with a dataId should fail with self.assertRaises(ValueError): butler.get(ref, dataId) # Getting with an explicit ref should fail if the id doesn't match with self.assertRaises(ValueError): butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) # Getting a dataset with unknown parameters should fail with self.assertRaises(KeyError): butler.get(ref, parameters={"unsupported": True}) # Check we have a collection collections = butler.registry.getAllCollections() self.assertEqual(collections, { "ingest", })
def testIngest(self): butler = Butler(self.tmpConfigFile, run="ingest") # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit", "detector"]) storageClass = self.storageClassFactory.getStorageClass( "StructuredDataDictYaml") datasetTypeName = "metric" datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) for detector in (1, 2): butler.registry.insertDimensionData( "detector", { "instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }, { "instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r" }) formatter = doImport( "lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") dataRoot = os.path.join(TESTDIR, "data", "basic") datasets = [] for detector in (1, 2): detector_name = f"detector_{detector}" metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") dataId = { "instrument": "DummyCamComp", "visit": 423, "detector": detector } # Create a DatasetRef for ingest refIn = DatasetRef(datasetType, dataId, id=None) datasets.append( FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) butler.ingest(*datasets, transfer="copy") dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} metrics1 = butler.get(datasetTypeName, dataId1) metrics2 = butler.get(datasetTypeName, dataId2) self.assertNotEqual(metrics1, metrics2) # Compare URIs uri1 = butler.getUri(datasetTypeName, dataId1) uri2 = butler.getUri(datasetTypeName, dataId2) self.assertNotEqual(uri1, uri2) # Now do a multi-dataset but single file ingest metricFile = os.path.join(dataRoot, "detectors.yaml") refs = [] for detector in (1, 2): detector_name = f"detector_{detector}" dataId = { "instrument": "DummyCamComp", "visit": 424, "detector": detector } # Create a DatasetRef for ingest refs.append(DatasetRef(datasetType, dataId, id=None)) datasets = [] datasets.append( FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) butler.ingest(*datasets, transfer="copy") dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} multi1 = butler.get(datasetTypeName, dataId1) multi2 = butler.get(datasetTypeName, dataId2) self.assertEqual(multi1, metrics1) self.assertEqual(multi2, metrics2) # Compare URIs uri1 = butler.getUri(datasetTypeName, dataId1) uri2 = butler.getUri(datasetTypeName, dataId2) self.assertEqual(uri1, uri2) # Test that removing one does not break the second butler.remove(datasetTypeName, dataId1) with self.assertRaises(LookupError): butler.datasetExists(datasetTypeName, dataId1) self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) multi2b = butler.get(datasetTypeName, dataId2) self.assertEqual(multi2, multi2b)
def runPutGetTest(self, storageClass, datasetTypeName): butler = Butler(self.tmpConfigFile, run="ingest") # There will not be a collection yet collections = butler.registry.getAllCollections() self.assertEqual(collections, set()) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() dataId = {"instrument": "DummyCamComp", "visit": 423} # Create a DatasetRef for put refIn = DatasetRef(datasetType, dataId, id=None) # Put with a preexisting id should fail with self.assertRaises(ValueError): butler.put(metric, DatasetRef(datasetType, dataId, id=100)) # Put and remove the dataset once as a DatasetRef, once as a dataId, # and once with a DatasetType for args in ((refIn, ), (datasetTypeName, dataId), (datasetType, dataId)): with self.subTest(args=args): ref = butler.put(metric, *args) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(ref.datasetType.name, dataId) self.assertEqual(metric, metricOut) # Test get with a datasetRef metricOut = butler.get(ref) self.assertEqual(metric, metricOut) # Test getDeferred with dataId metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() self.assertEqual(metric, metricOut) # Test getDeferred with a datasetRef metricOut = butler.getDeferred(ref).get() self.assertEqual(metric, metricOut) # Check we can get components if storageClass.isComposite(): self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) # Remove from collection only; after that we shouldn't be able # to find it unless we use the dataset_id. butler.remove(*args, delete=False) with self.assertRaises(LookupError): butler.datasetExists(*args) # If we use the output ref with the dataset_id, we should # still be able to load it with getDirect(). self.assertEqual(metric, butler.getDirect(ref)) # Reinsert into collection, then delete from Datastore *and* # remove from collection. butler.registry.associate(butler.collection, [ref]) butler.remove(*args) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # Now getDirect() should fail, too. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry still knows about it, if we use the dataset_id. self.assertEqual(butler.registry.getDataset(ref.id), ref) # Put again, then remove completely (this generates a new # dataset record in registry, with a new ID - the old one # still exists but it is not in any collection so we don't # care). ref = butler.put(metric, *args) butler.remove(*args, remember=False) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # getDirect() should still fail. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry shouldn't be able to find it by dataset_id anymore. self.assertIsNone(butler.registry.getDataset(ref.id)) # Put the dataset again, since the last thing we did was remove it. ref = butler.put(metric, refIn) # Get with parameters stop = 4 sliced = butler.get(ref, parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with parameters sliced = butler.getDeferred(ref, parameters={ "slice": slice(stop) }).get() self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with deferred parameters sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) if storageClass.isComposite(): # Delete one component and check that the other components # can still be retrieved metricOut = butler.get(ref.datasetType.name, dataId) compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") summary = butler.get(compNameS, dataId) self.assertEqual(summary, metric.summary) self.assertTrue(butler.datastore.exists(ref.components["summary"])) butler.remove(compNameS, dataId, remember=True) with self.assertRaises(LookupError): butler.datasetExists(compNameS, dataId) self.assertFalse(butler.datastore.exists( ref.components["summary"])) self.assertTrue(butler.datastore.exists(ref.components["data"])) data = butler.get(compNameD, dataId) self.assertEqual(data, metric.data) # Combining a DatasetRef with a dataId should fail with self.assertRaises(ValueError): butler.get(ref, dataId) # Getting with an explicit ref should fail if the id doesn't match with self.assertRaises(ValueError): butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) # Getting a dataset with unknown parameters should fail with self.assertRaises(KeyError): butler.get(ref, parameters={"unsupported": True}) # Check we have a collection collections = butler.registry.getAllCollections() self.assertEqual(collections, { "ingest", }) # Clean up to check that we can remove something that may have # already had a component removed butler.remove(ref.datasetType.name, dataId) # Add a dataset back in since some downstream tests require # something to be present ref = butler.put(metric, refIn) return butler # Construct a butler with no run or collection, but make it writeable. butler = Butler(self.tmpConfigFile, writeable=True) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType( "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) dataId = {"instrument": "DummyCamComp", "visit": 423} # Create dataset. metric = makeExampleMetrics() # Register a new run and put dataset. run = "deferred" butler.registry.registerRun(run) ref = butler.put(metric, datasetType, dataId, run=run) # Putting with no run should fail with TypeError. with self.assertRaises(TypeError): butler.put(metric, datasetType, dataId) # Dataset should exist. self.assertTrue( butler.datasetExists(datasetType, dataId, collection=run)) # We should be able to get the dataset back, but with and without # a deferred dataset handle. self.assertEqual(metric, butler.get(datasetType, dataId, collection=run)) self.assertEqual( metric, butler.getDeferred(datasetType, dataId, collection=run).get()) # Trying to find the dataset without any collection is a TypeError. with self.assertRaises(TypeError): butler.datasetExists(datasetType, dataId) with self.assertRaises(TypeError): butler.get(datasetType, dataId) with self.assertRaises(TypeError): butler.remove(datasetType, dataId) # Associate the dataset with a different collection. butler.registry.associate("tagged", [ref]) # Deleting the dataset from the new collection should make it findable # in the original collection but without a Datastore entry. butler.remove(datasetType, dataId, collection="tagged") self.assertFalse( butler.datasetExists(datasetType, dataId, collection=run))