Exemplo n.º 1
0
 def testImportExport(self):
     # Run put/get tests just to create and populate a repo.
     storageClass = self.storageClassFactory.getStorageClass(
         "StructuredDataNoComponents")
     exportButler = self.runPutGetTest(storageClass, "test_metric")
     # Test that the repo actually has at least one dataset.
     datasets = list(
         exportButler.registry.queryDatasets(..., collections=...))
     self.assertGreater(len(datasets), 0)
     # Export those datasets.  We used TemporaryDirectory because there
     # doesn't seem to be a way to get the filename (as opposed to the file
     # object) from any of tempfile's temporary-file context managers.
     with tempfile.TemporaryDirectory() as exportDir:
         # TODO: When PosixDatastore supports transfer-on-exist, add tests
         # for that.
         exportFile = os.path.join(exportDir, "exports.yaml")
         with exportButler.export(filename=exportFile) as export:
             export.saveDatasets(datasets)
         self.assertTrue(os.path.exists(exportFile))
         with tempfile.TemporaryDirectory() as importDir:
             Butler.makeRepo(importDir, config=Config(self.configFile))
             importButler = Butler(importDir, run="ingest")
             importButler.import_(filename=exportFile,
                                  directory=exportButler.datastore.root,
                                  transfer="symlink")
             for ref in datasets:
                 with self.subTest(ref=ref):
                     # Test for existence by passing in the DatasetType and
                     # data ID separately, to avoid lookup by dataset_id.
                     self.assertTrue(
                         importButler.datasetExists(ref.datasetType,
                                                    ref.dataId))
Exemplo n.º 2
0
def _import(
    yamlBuffer: io.StringIO,
    newButler: Butler,
    inserts: DataSetTypeMap,
    run: Optional[str],
    butlerModifier: Optional[Callable[[Butler], Butler]],
) -> Butler:
    # This method takes the exports from the existing butler, imports
    # them into the newly created butler, and then inserts the datasets
    # that are expected to be produced.

    # import the existing datasets using "split" mode. "split" is safe
    # because execution butler is assumed to be able to see all the file
    # locations that the main datastore can see. "split" supports some
    # absolute URIs in the datastore.
    newButler.import_(filename=yamlBuffer, format="yaml", reuseIds=True, transfer="split")

    # If there is modifier callable, run it to make necessary updates
    # to the new butler.
    if butlerModifier is not None:
        newButler = butlerModifier(newButler)

    # Register datasets to be produced and insert them into the registry
    for dsType, dataIds in inserts.items():
        # There may be inconsistencies with storage class definitions
        # so those differences must be checked.
        try:
            newButler.registry.registerDatasetType(dsType)
        except ConflictingDefinitionError:
            # We do not at this point know whether the dataset type is
            # an intermediate (and so must be able to support conversion
            # from the registry storage class to an input) or solely an output
            # dataset type. Test both compatibilities.
            registryDsType = newButler.registry.getDatasetType(dsType.name)
            if registryDsType.is_compatible_with(dsType) and dsType.is_compatible_with(registryDsType):
                # Ensure that we use the registry type when inserting.
                dsType = registryDsType
            else:
                # Not compatible so re-raise the original exception.
                raise

        newButler.registry.insertDatasets(dsType, dataIds, run)

    return newButler
Exemplo n.º 3
0
#!/usr/bin/env python

from  lsst.daf.butler import Butler, CollectionType
import shutil
import os

for filename in ("butler.yaml", "gen3.sqlite3"):
    if os.path.exists(os.path.join("DATA", filename)):
        os.remove(os.path.join("DATA", filename))

Butler.makeRepo("DATA")
butler = Butler("DATA", writeable=True)
butler.import_(filename="DATA/ci_hsc.yaml", transfer=None)
butler.registry.registerCollection("HSC/defaults", type=CollectionType.CHAINED)
butler.registry.setCollectionChain(
    "HSC/defaults",
    [
        "HSC/raw/all",
        "HSC/calib",
        "HSC/masks",
        "refcats",
        "skymaps"
    ]
)
Exemplo n.º 4
0
    def testChained(self):
        with self.runner.isolated_filesystem():

            # Create a butler and add some chained collections:
            butlerCfg = Butler.makeRepo("here")

            butler1 = Butler(butlerCfg, writeable=True)

            # Replace datastore functions with mocks:
            DatastoreMock.apply(butler1)

            butler1.import_(filename=os.path.join(TESTDIR, "data", "registry",
                                                  "base.yaml"))
            butler1.import_(filename=os.path.join(TESTDIR, "data", "registry",
                                                  "datasets.yaml"))
            registry1 = butler1.registry
            registry1.registerRun("run1")
            registry1.registerCollection("tag1", CollectionType.TAGGED)
            registry1.registerCollection("calibration1",
                                         CollectionType.CALIBRATION)
            registry1.registerCollection("chain1", CollectionType.CHAINED)
            registry1.registerCollection("chain2", CollectionType.CHAINED)
            registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
            registry1.setCollectionChain("chain2", ["calibration1", "run1"])

            # Use the script function to test the query-collections TREE
            # option, because the astropy.table.Table.read method, which we are
            # using for verification elsewhere in this file, seems to strip
            # leading whitespace from columns. This makes it impossible to test
            # the nested TREE output of the query-collections subcommand from
            # the command line interface.
            table = queryCollections("here",
                                     glob=(),
                                     collection_type=CollectionType.all(),
                                     chains="TREE")

            # self.assertEqual(result.exit_code, 0, clickResultMsg(result))
            expected = Table(array(
                (("imported_g", "RUN"), ("imported_r", "RUN"), ("run1", "RUN"),
                 ("tag1", "TAGGED"), ("calibration1", "CALIBRATION"),
                 ("chain1", "CHAINED"), ("  tag1", "TAGGED"),
                 ("  run1", "RUN"), ("  chain2", "CHAINED"),
                 ("    calibration1", "CALIBRATION"), ("    run1", "RUN"),
                 ("chain2", "CHAINED"), ("  calibration1",
                                         "CALIBRATION"), ("  run1", "RUN"))),
                             names=("Name", "Type"))
            self.assertAstropyTablesEqual(table, expected)

            result = self.runner.invoke(cli, ["query-collections", "here"])
            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
            expected = Table(array(
                (("imported_g", "RUN", ""), ("imported_r", "RUN", ""),
                 ("run1", "RUN", ""), ("tag1", "TAGGED", ""),
                 ("calibration1", "CALIBRATION", ""), ("chain1", "CHAINED",
                                                       "[tag1, run1, chain2]"),
                 ("chain2", "CHAINED", "[calibration1, run1]"))),
                             names=("Name", "Type", "Definition"))
            table = readTable(result.output)
            self.assertAstropyTablesEqual(readTable(result.output), expected)

            result = self.runner.invoke(
                cli, ["query-collections", "here", "--chains", "FLATTEN"])
            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
            expected = Table(array(
                (("imported_g", "RUN"), ("imported_r", "RUN"), ("run1", "RUN"),
                 ("tag1", "TAGGED"), ("calibration1", "CALIBRATION"),
                 ("tag1", "TAGGED"), ("run1", "RUN"), ("calibration1",
                                                       "CALIBRATION"),
                 ("calibration1", "CALIBRATION"), ("run1", "RUN"))),
                             names=("Name", "Type"))
            self.assertAstropyTablesEqual(readTable(result.output), expected)