def datasetExists(self, datasetType, dataId={}, **rest): """Determines if a data set file exists. @param datasetType the type of data set to inquire about. @param dataId the data id of the data set. @param **rest keyword arguments for the data id. @returns True if the data set exists or is non-file-based. """ dataId = self._combineDicts(dataId, **rest) location = self.mapper.map(datasetType, dataId) additionalData = location.getAdditionalData() storageName = location.getStorageName() if storageName in ('BoostStorage', 'FitsStorage', 'PafStorage', 'PickleStorage'): locations = location.getLocations() for locationString in locations: logLoc = LogicalLocation(locationString, additionalData) if not os.path.exists(logLoc.locString()): return False return True self.log.log(pexLog.Log.WARN, "datasetExists() for non-file storage %s, " + "dataset type=%s, keys=%s""" % (storageName, datasetType, str(dataId))) return True
def butlerWrite(obj, butlerLocation): if butlerLocation.getStorageName() is not "YamlStorage": raise NotImplementedError("RepositoryCfg only supports YamlStorage") ret = [] for location in butlerLocation.getLocations(): logLoc = LogicalLocation(location, butlerLocation.getAdditionalData()) if not os.path.exists(os.path.dirname(logLoc.locString())): os.makedirs(os.path.dirname(logLoc.locString())) with open(logLoc.locString(), 'w') as f: yaml.dump(obj, f)
def butlerRead(butlerLocation): if butlerLocation.getStorageName() is not "YamlStorage": raise NotImplementedError("RepositoryCfg only supports YamlStorage") ret = [] for location in butlerLocation.getLocations(): logLoc = LogicalLocation(location, butlerLocation.getAdditionalData()) with open(logLoc.locString()) as f: cfg = yaml.load(f) cfg['accessCfg.storageCfg.root'] = os.path.dirname(location) ret.append(cfg) return ret
def tearDown(self): """Clean up after test case runs""" db = DbStorage() db.setPersistLocation(LogicalLocation(self.dbUrlPrefix + 'test_ap')) #db.executeSql("DROP DATABASE %(runId)s" % self.substitutions) del self.policies del self.stages
def preprocess(self): self.activeClipboard = self.inputQueue.getNextDataset() fpaExposureId0 = self.activeClipboard.get('visit0').get('exposureId') fpaExposureId1 = self.activeClipboard.get('visit1').get('exposureId') db = DbStorage() loc = LogicalLocation("%(dbUrl)") db.setPersistLocation(loc) db.startTransaction() db.executeSql(""" INSERT INTO Raw_CCD_Exposure SELECT DISTINCT rawCCDExposureId, rawFPAExposureId FROM Raw_Amp_Exposure WHERE rawFPAExposureId = %d OR rawFPAExposureId = %d """ % (fpaExposureId0, fpaExposureId1) ) db.executeSql(""" INSERT INTO Science_CCD_Exposure SELECT DISTINCT scienceCCDExposureId, scienceFPAExposureId, scienceCCDExposureId FROM Science_Amp_Exposure WHERE scienceFPAExposureId = %d OR scienceFPAExposureId = %d """ % (fpaExposureId0, fpaExposureId1) ) db.executeSql(""" INSERT INTO Science_FPA_Exposure SELECT DISTINCT scienceFPAExposureId FROM Science_Amp_Exposure WHERE scienceFPAExposureId = %d OR scienceFPAExposureId = %d """ % (fpaExposureId0, fpaExposureId1) ) db.endTransaction()
def process(self): clipboard = self.inputQueue.getNextDataset() additionalData = Utils.createAdditionalData(self, self._policy, clipboard) templateLocation = self._policy.get('templateLocation') templatePath = LogicalLocation(templateLocation, additionalData).locString() metadata = afwImage.readMetadata(templatePath) dims = afwImage.PointI(metadata.get("NAXIS1"), metadata.get("NAXIS2")) outputKey = self._policy.get('outputKey') clipboard.put(outputKey, dims) self.outputQueue.addDataset(clipboard)
def put(self, obj, datasetType, dataId={}, **rest): """Persists a data set given an output collection data id. @param obj the object to persist. @param datasetType the type of data set to persist. @param dataId the data id. @param **rest keyword arguments for the data id. """ dataId = self._combineDicts(dataId, **rest) location = self.mapper.map(datasetType, dataId) self.log.log(pexLog.Log.DEBUG, "Put type=%s keys=%s to %s" % (datasetType, dataId, str(location))) additionalData = location.getAdditionalData() storageName = location.getStorageName() locations = location.getLocations() # TODO support multiple output locations locationString = locations[0] logLoc = LogicalLocation(locationString, additionalData) trace = pexLog.BlockTimingLog(self.log, "put", pexLog.BlockTimingLog.INSTRUM+1) trace.setUsageFlags(trace.ALLUDATA) if storageName == "PickleStorage": trace.start("write to %s(%s)" % (storageName, logLoc.locString())) outDir = os.path.dirname(logLoc.locString()) if outDir != "" and not os.path.exists(outDir): try: os.makedirs(outDir) except OSError, e: # Don't fail if directory exists due to race if e.errno != 17: raise e with open(logLoc.locString(), "wb") as outfile: cPickle.dump(obj, outfile, cPickle.HIGHEST_PROTOCOL) trace.done() return
def _read(self, pythonType, location): trace = pexLog.BlockTimingLog(self.log, "read", pexLog.BlockTimingLog.INSTRUM+1) additionalData = location.getAdditionalData() # Create a list of Storages for the item. storageName = location.getStorageName() results = [] locations = location.getLocations() returnList = True if len(locations) == 1: returnList = False for locationString in locations: logLoc = LogicalLocation(locationString, additionalData) trace.start("read from %s(%s)" % (storageName, logLoc.locString())) if storageName == "PafStorage": finalItem = pexPolicy.Policy.createPolicy(logLoc.locString()) elif storageName == "PickleStorage": if not os.path.exists(logLoc.locString()): raise RuntimeError, \ "No such pickle file: " + logLoc.locString() with open(logLoc.locString(), "rb") as infile: finalItem = cPickle.load(infile) else: storageList = StorageList() storage = self.persistence.getRetrieveStorage(storageName, logLoc) storageList.append(storage) itemData = self.persistence.unsafeRetrieve( location.getCppType(), storageList, additionalData) finalItem = pythonType.swigConvert(itemData) trace.done() results.append(finalItem) if not returnList: results = results[0] return results
def read(self, butlerLocation): """Read from a butlerLocation. :param butlerLocation: :return: a list of objects as described by the butler location. One item for each location in butlerLocation.getLocations() """ additionalData = butlerLocation.getAdditionalData() # Create a list of Storages for the item. storageName = butlerLocation.getStorageName() results = [] locations = butlerLocation.getLocations() pythonType = butlerLocation.getPythonType() if pythonType is not None: if isinstance(pythonType, basestring): # import this pythonType dynamically pythonTypeTokenList = pythonType.split('.') importClassString = pythonTypeTokenList.pop() importClassString = importClassString.strip() importPackage = ".".join(pythonTypeTokenList) importType = __import__(importPackage, globals(), locals(), [importClassString], -1) pythonType = getattr(importType, importClassString) # see note re. discomfort with the name 'butlerWrite' in the write method, above. Same applies to butlerRead. if hasattr(pythonType, 'butlerRead'): results = pythonType.butlerRead(butlerLocation=butlerLocation) return results for locationString in locations: logLoc = LogicalLocation(locationString, additionalData) if storageName == "PafStorage": finalItem = pexPolicy.Policy.createPolicy(logLoc.locString()) elif storageName == "YamlStorage": finalItem = Policy(filePath=logLoc.locString()) elif storageName == "PickleStorage": if not os.path.exists(logLoc.locString()): raise RuntimeError, "No such pickle file: " + logLoc.locString() with open(logLoc.locString(), "rb") as infile: finalItem = cPickle.load(infile) elif storageName == "FitsCatalogStorage": if not os.path.exists(logLoc.locString()): raise RuntimeError, "No such FITS catalog file: " + logLoc.locString() hdu = additionalData.getInt("hdu", 0) flags = additionalData.getInt("flags", 0) finalItem = pythonType.readFits(logLoc.locString(), hdu, flags) elif storageName == "ConfigStorage": if not os.path.exists(logLoc.locString()): raise RuntimeError, "No such config file: " + logLoc.locString() finalItem = pythonType() finalItem.load(logLoc.locString()) else: storageList = StorageList() storage = self.persistence.getRetrieveStorage(storageName, logLoc) storageList.append(storage) itemData = self.persistence.unsafeRetrieve( butlerLocation.getCppType(), storageList, additionalData) finalItem = pythonType.swigConvert(itemData) results.append(finalItem) return results
from lsst.pex.policy import Policy, PolicyString from lsst.daf.persistence import LogicalLocation from lsst.daf.base import PropertySet from lsst.pex.logging import Log # Log.getDefaultLog().setThreshold(Log.WARN) rootlogger = Log.getDefaultLog() rootlogger.setThreshold(Log.WARN) testdir = os.path.join(os.environ["CTRL_SCHED_DIR"], "tests") exampledir = os.path.join(os.environ["CTRL_SCHED_DIR"], "examples") bbdir = os.path.join(testdir, "testbb") locations = PropertySet() locations.set("input", testdir) LogicalLocation.setLocationMap(locations) class AbstractSchedulerTestCase(unittest.TestCase): def setUp(self): pass def tearDown(self): pass def testNoCtor(self): self.assertRaises(RuntimeError, Scheduler, None) def testNoRecognizeImpl(self): t = Scheduler(None, fromSubclass=True) self.assertRaises(RuntimeError, t.processDataset, None, True) # self.assertRaises(RuntimeError, t.makeJobsAvailable)
def lookupFilterId(self, filterName): dbLocation = LogicalLocation("%(dbUrl)") filterDb = afwImage.Filter(dbLocation, filterName) filterId = filterDb.getId() return filterId
def setUp(self): # Turn on tracing log.Trace.setVerbosity('', 10) log.ScreenLog.createDefaultLog(True, log.Log.INFO) # Eventually, these should be read from a policy somewhere self.dbServer = 'lsst10.ncsa.uiuc.edu' self.dbPort = '3306' self.dbType = 'mysql' if not DbAuth.available(self.dbServer, self.dbPort): self.fail("Cannot access database server %s:%s" % (self.dbServer, self.dbPort)) # Construct test run database name self.runId = DbAuth.username(self.dbServer, self.dbPort) +\ time.strftime("_test_ap_%y%m%d_%H%M%S", time.gmtime()) # Tweak these to run on different input data, or with a different number of slices self.universeSize = 2 self.visitId = 708125 self.filter = 'u' self.ra = 333.880166667 self.dec = -17.7374166667 self.dbUrlPrefix = ''.join([self.dbType, '://', self.dbServer, ':', self.dbPort, '/']) self.dbUrl = self.dbUrlPrefix + self.runId self.substitutions = { 'visitId': self.visitId, 'filter': self.filter, 'runId': self.runId } # Create a database specifically for the test (copy relevant # tables from the test_ap database) mysqlStatements = [ """CREATE DATABASE %(runId)s""", """USE %(runId)s""", """CREATE TABLE VarObject LIKE test_ap.Object""", """CREATE TABLE NonVarObject LIKE test_ap.Object""", """CREATE TABLE DIASource LIKE test_ap.DIASource""", """CREATE TABLE prv_Filter LIKE test_ap.prv_Filter""", """INSERT INTO prv_Filter SELECT * FROM test_ap.prv_Filter""", """CREATE TABLE _tmp_v%(visitId)d_DIASource LIKE test_ap._tmp_v%(visitId)d_DIASource""", """INSERT INTO _tmp_v%(visitId)d_DIASource SELECT * FROM test_ap._tmp_v%(visitId)d_DIASource""", """CREATE TABLE _tmp_v%(visitId)d_Preds LIKE test_ap._tmp_v%(visitId)d_Preds""", """INSERT INTO _tmp_v%(visitId)d_Preds SELECT * FROM test_ap._tmp_v%(visitId)d_Preds""", """CREATE TABLE _tmpl_MatchPair LIKE test_ap._tmpl_MatchPair""", """CREATE TABLE _tmpl_IdPair LIKE test_ap._tmpl_IdPair""", """CREATE TABLE _tmpl_InMemoryObject LIKE test_ap._tmpl_InMemoryObject""", """CREATE TABLE _tmpl_InMemoryMatchPair LIKE test_ap._tmpl_InMemoryMatchPair""", """CREATE TABLE _tmpl_InMemoryId LIKE test_ap._tmpl_InMemoryId""", """CREATE TABLE _ap_DIASourceToObjectMatches LIKE test_ap._ap_DIASourceToObjectMatches""", """CREATE TABLE _ap_PredToDIASourceMatches LIKE test_ap._ap_PredToDIASourceMatches""", """CREATE TABLE _ap_DIASourceToNewObject LIKE test_ap._ap_DIASourceToNewObject""", """CREATE TABLE _mops_Prediction LIKE test_ap._mops_Prediction""" ] db = DbStorage() db.setPersistLocation(LogicalLocation(self.dbUrlPrefix + 'test_ap')) try: for stmt in mysqlStatements: db.executeSql(stmt % self.substitutions) # Specify list of stages ... self.stages = [ ap.LoadStage, InputStage, ap.MatchDiaSourcesStage, OutputStage, InputStage, ap.MatchMopsPredsStage, OutputStage, ap.StoreStage ] # and read in stage policy for each stage policyDir = os.path.join(os.environ['AP_DIR'], 'pipeline', 'examples', 'policy') self.policies = [ Policy(os.path.join(policyDir,'LoadStage.paf')), Policy(os.path.join(policyDir,'MatchDiaSourcesStageInput.paf')), None, Policy(os.path.join(policyDir,'MatchDiaSourcesStageOutput.paf')), Policy(os.path.join(policyDir,'MatchMopsPredsStageInput.paf')), None, Policy(os.path.join(policyDir,'MatchMopsPredsStageOutput.paf')), Policy(os.path.join(policyDir,'StoreStage.paf')) ] # construct PropertySet for string interpolation psSubs = PropertySet() psSubs.setInt('visitId', self.visitId) psSubs.setString('runId', self.runId) psSubs.setString('filter', self.filter) psSubs.setString('work', '.') psSubs.setString('input', '/tmp') psSubs.setString('output', '/tmp') psSubs.setString('update', '/tmp') psSubs.setString('dbUrl', self.dbUrl) LogicalLocation.setLocationMap(psSubs) except: # cleanup database in case of error db.executeSql("DROP DATABASE %(runId)s" % self.substitutions) raise
def setUp(self): # Turn on tracing log.Trace.setVerbosity('', 10) log.ScreenLog.createDefaultLog(True, log.Log.INFO) # Eventually, these should be read from a policy somewhere self.dbServer = 'lsst10.ncsa.uiuc.edu' self.dbPort = '3306' self.dbType = 'mysql' if not DbAuth.available(self.dbServer, self.dbPort): self.fail("Cannot access database server %s:%s" % (self.dbServer, self.dbPort)) # Construct test run database name self.runId = DbAuth.username(self.dbServer, self.dbPort) +\ time.strftime("_test_ap_%y%m%d_%H%M%S", time.gmtime()) # Tweak these to run on different input data, or with a different number of slices self.universeSize = 2 self.visitId = 708125 self.filter = 'u' self.ra = 333.880166667 self.dec = -17.7374166667 self.dbUrlPrefix = ''.join( [self.dbType, '://', self.dbServer, ':', self.dbPort, '/']) self.dbUrl = self.dbUrlPrefix + self.runId self.substitutions = { 'visitId': self.visitId, 'filter': self.filter, 'runId': self.runId } # Create a database specifically for the test (copy relevant # tables from the test_ap database) mysqlStatements = [ """CREATE DATABASE %(runId)s""", """USE %(runId)s""", """CREATE TABLE VarObject LIKE test_ap.Object""", """CREATE TABLE NonVarObject LIKE test_ap.Object""", """CREATE TABLE DIASource LIKE test_ap.DIASource""", """CREATE TABLE prv_Filter LIKE test_ap.prv_Filter""", """INSERT INTO prv_Filter SELECT * FROM test_ap.prv_Filter""", """CREATE TABLE _tmp_v%(visitId)d_DIASource LIKE test_ap._tmp_v%(visitId)d_DIASource""", """INSERT INTO _tmp_v%(visitId)d_DIASource SELECT * FROM test_ap._tmp_v%(visitId)d_DIASource""", """CREATE TABLE _tmp_v%(visitId)d_Preds LIKE test_ap._tmp_v%(visitId)d_Preds""", """INSERT INTO _tmp_v%(visitId)d_Preds SELECT * FROM test_ap._tmp_v%(visitId)d_Preds""", """CREATE TABLE _tmpl_MatchPair LIKE test_ap._tmpl_MatchPair""", """CREATE TABLE _tmpl_IdPair LIKE test_ap._tmpl_IdPair""", """CREATE TABLE _tmpl_InMemoryObject LIKE test_ap._tmpl_InMemoryObject""", """CREATE TABLE _tmpl_InMemoryMatchPair LIKE test_ap._tmpl_InMemoryMatchPair""", """CREATE TABLE _tmpl_InMemoryId LIKE test_ap._tmpl_InMemoryId""", """CREATE TABLE _ap_DIASourceToObjectMatches LIKE test_ap._ap_DIASourceToObjectMatches""", """CREATE TABLE _ap_PredToDIASourceMatches LIKE test_ap._ap_PredToDIASourceMatches""", """CREATE TABLE _ap_DIASourceToNewObject LIKE test_ap._ap_DIASourceToNewObject""", """CREATE TABLE _mops_Prediction LIKE test_ap._mops_Prediction""" ] db = DbStorage() db.setPersistLocation(LogicalLocation(self.dbUrlPrefix + 'test_ap')) try: for stmt in mysqlStatements: db.executeSql(stmt % self.substitutions) # Specify list of stages ... self.stages = [ ap.LoadStage, InputStage, ap.MatchDiaSourcesStage, OutputStage, InputStage, ap.MatchMopsPredsStage, OutputStage, ap.StoreStage ] # and read in stage policy for each stage policyDir = os.path.join(os.environ['AP_DIR'], 'pipeline', 'examples', 'policy') self.policies = [ Policy(os.path.join(policyDir, 'LoadStage.paf')), Policy(os.path.join(policyDir, 'MatchDiaSourcesStageInput.paf')), None, Policy( os.path.join(policyDir, 'MatchDiaSourcesStageOutput.paf')), Policy(os.path.join(policyDir, 'MatchMopsPredsStageInput.paf')), None, Policy(os.path.join(policyDir, 'MatchMopsPredsStageOutput.paf')), Policy(os.path.join(policyDir, 'StoreStage.paf')) ] # construct PropertySet for string interpolation psSubs = PropertySet() psSubs.setInt('visitId', self.visitId) psSubs.setString('runId', self.runId) psSubs.setString('filter', self.filter) psSubs.setString('work', '.') psSubs.setString('input', '/tmp') psSubs.setString('output', '/tmp') psSubs.setString('update', '/tmp') psSubs.setString('dbUrl', self.dbUrl) LogicalLocation.setLocationMap(psSubs) except: # cleanup database in case of error db.executeSql("DROP DATABASE %(runId)s" % self.substitutions) raise
def testSubst(self): ad = PropertySet() ad.set("foo", "bar") ad.setInt("x", 3) LogicalLocation.setLocationMap(ad) l = LogicalLocation("%(foo)xx") self.assertEqual(l.locString(), "barxx") l = LogicalLocation("%(x)foo") self.assertEqual(l.locString(), "3foo") l = LogicalLocation("yy%04d(x)yy") self.assertEqual(l.locString(), "yy0003yy") ad2 = PropertySet() ad2.set("foo", "baz") ad2.setInt("y", 2009) l = LogicalLocation("%(foo)%(x)%(y)", ad2) self.assertEqual(l.locString(), "bar32009") LogicalLocation.setLocationMap(PropertySet()) l = LogicalLocation("%(foo)%3d(y)", ad2) self.assertEqual(l.locString(), "baz2009")
def __init__(self, runId, activityName, platform, dbLoc, globalDbLoc, activOffset=0, runOffset=None, logger=None): """ Initialize a ProvenanceRecorder. @param runId the unique production run ID @param activityName a name for the activity that provenance is being recorded for. On the launch platform this should be the name of the production run (not the runid). On a workflow platform (where pipelines are run), this should be the name of the workflow. @param platform a logical name for the platform where this class has been instantiated. This is not typically a DNS name, but it can be. This is usually the name from the platform policy. @param dbLoc the URL representing the production run- specific database @param globalLoc the URL representing the global database shared by all production runs. @param activOffset the integer ID assigned to the current workflow by the orchestration layer which is unique to the runid. On the launch platform, this should be zero. On the workflow platforms, this is n for nth workflow listed in the production run policy file. @param runOffset the integer ID assigned to this run (runId) by the database. This should be None when instantiating from the launch platform. In this case, the run will be initialized to assign the runOffset (which can later be retrieved via getRunOffset()). On workflow platforms the runOffset must be provided properly associate workflow provenance with the right production run. @param logger a Log instance to use for messages """ ProvenanceRecorder.__init__(self, logger, True) self._runid = runId self._activName = activityName self._platName = platform # the index for the this production run self._roffset = runOffset # the index for this activity (launch process or workflow) self._aoffset = activOffset self._rundb = DbStorage() self._rundb.setPersistLocation(LogicalLocation(dbLoc)) self._globalLoc = LogicalLocation(globalDbLoc) self._globalDb = DbStorage() self._globalDb.setPersistLocation(self._globalLoc) self.initialize()
def testSubst(self): ad = PropertySet() ad.set("foo", "bar") ad.setInt("x", 3) LogicalLocation.setLocationMap(ad) loc = LogicalLocation("%(foo)xx") self.assertEqual(loc.locString(), "barxx") loc = LogicalLocation("%(x)foo") self.assertEqual(loc.locString(), "3foo") loc = LogicalLocation("yy%04d(x)yy") self.assertEqual(loc.locString(), "yy0003yy") ad2 = PropertySet() ad2.set("foo", "baz") ad2.setInt("y", 2009) loc = LogicalLocation("%(foo)%(x)%(y)", ad2) self.assertEqual(loc.locString(), "bar32009") LogicalLocation.setLocationMap(PropertySet()) loc = LogicalLocation("%(foo)%3d(y)", ad2) self.assertEqual(loc.locString(), "baz2009")
def write(self, butlerLocation, obj): """Writes an object to a location and persistence format specified by ButlerLocation :param butlerLocation: the location & formatting for the object to be written. :param obj: the object to be written. :return: None """ self.log.log(pexLog.Log.DEBUG, "Put location=%s obj=%s" % (butlerLocation, obj)) additionalData = butlerLocation.getAdditionalData() storageName = butlerLocation.getStorageName() locations = butlerLocation.getLocations() pythonType = butlerLocation.getPythonType() if pythonType is not None: if isinstance(pythonType, basestring): # import this pythonType dynamically pythonTypeTokenList = pythonType.split('.') importClassString = pythonTypeTokenList.pop() importClassString = importClassString.strip() importPackage = ".".join(pythonTypeTokenList) importType = __import__(importPackage, globals(), locals(), [importClassString], -1) pythonType = getattr(importType, importClassString) # todo this effectively defines the butler posix "do serialize" command to be named "put". This has # implications; write now I'm worried that any python type that can be written to disk and has a method # called 'put' will be called here (even if it's e.g. destined for FitsStorage). We might want a somewhat # more specific API. if hasattr(pythonType, 'butlerWrite'): pythonType.butlerWrite(obj, butlerLocation=butlerLocation) return with SafeFilename(locations[0]) as locationString: logLoc = LogicalLocation(locationString, additionalData) if storageName == "PickleStorage": with open(logLoc.locString(), "wb") as outfile: cPickle.dump(obj, outfile, cPickle.HIGHEST_PROTOCOL) return if storageName == "ConfigStorage": obj.save(logLoc.locString()) return if storageName == "FitsCatalogStorage": flags = additionalData.getInt("flags", 0) obj.writeFits(logLoc.locString(), flags=flags) return # Create a list of Storages for the item. storageList = StorageList() storage = self.persistence.getPersistStorage(storageName, logLoc) storageList.append(storage) if storageName == 'FitsStorage': self.persistence.persist(obj, storageList, additionalData) return # Persist the item. if hasattr(obj, '__deref__'): # We have a smart pointer, so dereference it. self.persistence.persist(obj.__deref__(), storageList, additionalData) else: self.persistence.persist(obj, storageList, additionalData)