def run(self, input: Union[TaskMetadata, Dict[str, int]]) -> Struct: # type: ignore """Run the task, adding the configured key-value pair to the input argument and returning it as the output. Parameters ---------- input : `dict` Dictionary to update and return. Returns ------- result : `lsst.pipe.base.Struct` Struct with a single ``output`` attribute. """ self.log.info("Run method given data of type: %s", get_full_type_name(input)) output = input.copy() output[self.config.key] = self.config.value # Can change the return type via configuration. if "TaskMetadata" in self.config.outputSC: output = TaskMetadata.from_dict(output) # type: ignore elif type(output) == TaskMetadata: # Want the output to be a dict output = output.to_dict() self.log.info("Run method returns data of type: %s", get_full_type_name(output)) return Struct(output=output)
def register(self, registry, update=False): # Docstring inherited from Instrument.register camera = self.getCamera() # The maximum values below make Gen3's ObservationDataIdPacker produce # outputs that match Gen2's ccdExposureId. obsMax = 21474800 with registry.transaction(): registry.syncDimensionData( "instrument", { "name": self.getName(), "detector_max": 200, "visit_max": obsMax, "exposure_max": obsMax, "class_name": get_full_type_name(self), # Some schemas support default visit_system "visit_system": VisitSystem.ONE_TO_ONE.value, }, update=update) for detector in camera: registry.syncDimensionData( "detector", { "instrument": self.getName(), "id": detector.getId(), "full_name": detector.getName(), # TODO: make sure these definitions are consistent with # those extracted by astro_metadata_translator, and # test that they remain consistent somehow. "name_in_raft": detector.getName().split("_")[1], "raft": detector.getName().split("_")[0], "purpose": str(detector.getType()).split(".")[-1], }, update=update) self._registerFilters(registry, update=update)
def register(self, registry, update=False): camera = self.getCamera() obsMax = 2**31 with registry.transaction(): registry.syncDimensionData( "instrument", { "name": self.getName(), "detector_max": 36, "visit_max": obsMax, "exposure_max": obsMax, "class_name": get_full_type_name(self), # Some schemas support default visit_system "visit_system": VisitSystem.ONE_TO_ONE.value, }, update=update) for detector in camera: registry.syncDimensionData( "detector", { "instrument": self.getName(), "id": detector.getId(), "full_name": detector.getName(), "name_in_raft": detector.getName(), "raft": None, # MegaPrime does not have rafts "purpose": str(detector.getType()).split(".")[-1], }, update=update) self._registerFilters(registry, update=update)
def register(self, registry, update=False): detector_max = 2 record = { "instrument": self.getName(), "class_name": get_full_type_name(DummyInstrument), "detector_max": detector_max, } with registry.transaction(): registry.syncDimensionData("instrument", record, update=update)
def writeFits(self, filename): """Write this object to a file. Parameters ---------- filename : `str` Name of file to write """ self._refresh_metadata() type_name = get_full_type_name(self) writeFits(filename, self._stamps, self._metadata, type_name, self.use_mask, self.use_variance, self.use_archive)
def testUnderscores(self): # Underscores are filtered out unless they can't be, either # because __init__.py did not import it or there is a clash with # the non-underscore version. for test_name in ( "import_test.two._four.simple.Simple", "import_test.two._four.clash.Simple", "import_test.two.clash.Simple", ): test_cls = get_class_of(test_name) self.assertTrue(test_cls.true()) full = get_full_type_name(test_cls) self.assertEqual(full, test_name)
def testTypeNames(self): # Check types and also an object tests = [ (getPackageDir, "lsst.utils.getPackageDir"), # underscore filtered out (int, "int"), (0, "int"), ("", "str"), (doImport, "lsst.utils.doImport.doImport"), # no underscore (Counter, "collections.Counter"), (Counter(), "collections.Counter"), (lsst.utils, "lsst.utils"), ] for item, typeName in tests: self.assertEqual(get_full_type_name(item), typeName)
def register(self, registry, update=False): # Docstring inherited from Instrument.register # The maximum values below make Gen3's ObservationDataIdPacker produce # outputs that match Gen2's ccdExposureId. obsMax = self.translatorClass.max_exposure_id() with registry.transaction(): registry.syncDimensionData("instrument", { "name": self.getName(), "detector_max": self.translatorClass.DETECTOR_MAX, "visit_max": obsMax, "exposure_max": obsMax, "class_name": get_full_type_name(self), }, update=update) for detector in self.getCamera(): registry.syncDimensionData( "detector", self.extractDetectorRecord(detector), update=update) self._registerFilters(registry, update=update)
def updateMetadata(self, camera=None, detector=None, filterName=None, setCalibId=False, setCalibInfo=False, setDate=False, **kwargs): """Update metadata keywords with new values. Parameters ---------- camera : `lsst.afw.cameraGeom.Camera`, optional Reference camera to use to set _instrument field. detector : `lsst.afw.cameraGeom.Detector`, optional Reference detector to use to set _detector* fields. filterName : `str`, optional Filter name to assign to this calibration. setCalibId : `bool`, optional Construct the _calibId field from other fields. setCalibInfo : `bool`, optional Set calibration parameters from metadata. setDate : `bool`, optional Ensure the metadata CALIBDATE fields are set to the current datetime. kwargs : `dict` or `collections.abc.Mapping`, optional Set of key=value pairs to assign to the metadata. """ mdOriginal = self.getMetadata() mdSupplemental = dict() for k, v in kwargs.items(): if isinstance(v, fits.card.Undefined): kwargs[k] = None if setCalibInfo: self.calibInfoFromDict(kwargs) if camera: self._instrument = camera.getName() if detector: self._detectorName = detector.getName() self._detectorSerial = detector.getSerial() self._detectorId = detector.getId() if "_" in self._detectorName: (self._raftName, self._slotName) = self._detectorName.split("_") if filterName: # TOD0 DM-28093: I think this whole comment can go away, if we # always use physicalLabel everywhere in ip_isr. # If set via: # exposure.getInfo().getFilter().getName() # then this will hold the abstract filter. self._filter = filterName if setDate: date = datetime.datetime.now() mdSupplemental["CALIBDATE"] = date.isoformat() mdSupplemental["CALIB_CREATION_DATE"] = date.date().isoformat() mdSupplemental["CALIB_CREATION_TIME"] = date.time().isoformat() if setCalibId: values = [] values.append( f"instrument={self._instrument}") if self._instrument else None values.append( f"raftName={self._raftName}") if self._raftName else None values.append(f"detectorName={self._detectorName}" ) if self._detectorName else None values.append( f"detector={self._detectorId}") if self._detectorId else None values.append(f"filter={self._filter}") if self._filter else None calibDate = mdOriginal.get("CALIBDATE", mdSupplemental.get("CALIBDATE", None)) values.append(f"calibDate={calibDate}") if calibDate else None self._calibId = " ".join(values) self._metadata[ "INSTRUME"] = self._instrument if self._instrument else None self._metadata["RAFTNAME"] = self._raftName if self._raftName else None self._metadata["SLOTNAME"] = self._slotName if self._slotName else None self._metadata["DETECTOR"] = self._detectorId self._metadata[ "DET_NAME"] = self._detectorName if self._detectorName else None self._metadata[ "DET_SER"] = self._detectorSerial if self._detectorSerial else None self._metadata["FILTER"] = self._filter if self._filter else None self._metadata["CALIB_ID"] = self._calibId if self._calibId else None self._metadata["CALIBCLS"] = get_full_type_name(self) mdSupplemental.update(kwargs) mdOriginal.update(mdSupplemental)
def setUp(self): config = Config( { "version": 1, "namespace": "pipe_base_test", "skypix": { "common": "htm7", "htm": { "class": "lsst.sphgeom.HtmPixelization", "max_level": 24, }, }, "elements": { "A": { "keys": [ { "name": "id", "type": "int", } ], "storage": { "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", }, }, "B": { "keys": [ { "name": "id", "type": "int", } ], "storage": { "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", }, }, }, "packers": {}, } ) universe = DimensionUniverse(config=config) # need to make a mapping of TaskDef to set of quantum quantumMap = {} tasks = [] for task, label in ( (Dummy1PipelineTask, "R"), (Dummy2PipelineTask, "S"), (Dummy3PipelineTask, "T"), (Dummy4PipelineTask, "U"), ): config = task.ConfigClass() taskDef = TaskDef(get_full_type_name(task), config, task, label) tasks.append(taskDef) quantumSet = set() connections = taskDef.connections for a, b in ((1, 2), (3, 4)): if connections.initInputs: initInputDSType = DatasetType( connections.initInput.name, tuple(), storageClass=connections.initInput.storageClass, universe=universe, ) initRefs = [DatasetRef(initInputDSType, DataCoordinate.makeEmpty(universe))] else: initRefs = None inputDSType = DatasetType( connections.input.name, connections.input.dimensions, storageClass=connections.input.storageClass, universe=universe, ) inputRefs = [ DatasetRef(inputDSType, DataCoordinate.standardize({"A": a, "B": b}, universe=universe)) ] outputDSType = DatasetType( connections.output.name, connections.output.dimensions, storageClass=connections.output.storageClass, universe=universe, ) outputRefs = [ DatasetRef(outputDSType, DataCoordinate.standardize({"A": a, "B": b}, universe=universe)) ] quantumSet.add( Quantum( taskName=task.__qualname__, dataId=DataCoordinate.standardize({"A": a, "B": b}, universe=universe), taskClass=task, initInputs=initRefs, inputs={inputDSType: inputRefs}, outputs={outputDSType: outputRefs}, ) ) quantumMap[taskDef] = quantumSet self.tasks = tasks self.quantumMap = quantumMap self.qGraph = QuantumGraph(quantumMap, metadata=METADATA) self.universe = universe
def _buildSaveObject( self, returnHeader: bool = False ) -> Union[bytearray, Tuple[bytearray, Dict]]: # make some containers jsonData: Deque[bytes] = deque() # node map is a list because json does not accept mapping keys that # are not strings, so we store a list of key, value pairs that will # be converted to a mapping on load nodeMap = [] taskDefMap = {} headerData: Dict[str, Any] = {} # Store the QauntumGraph BuildId, this will allow validating BuildIds # at load time, prior to loading any QuantumNodes. Name chosen for # unlikely conflicts. headerData["GraphBuildID"] = self.graphID headerData["Metadata"] = self._metadata # counter for the number of bytes processed thus far count = 0 # serialize out the task Defs recording the start and end bytes of each # taskDef inverseLookup = self._datasetDict.inverse taskDef: TaskDef # sort by task label to ensure serialization happens in the same order for taskDef in self.taskGraph: # compressing has very little impact on saving or load time, but # a large impact on on disk size, so it is worth doing taskDescription = {} # save the fully qualified name. taskDescription["taskName"] = get_full_type_name(taskDef.taskClass) # save the config as a text stream that will be un-persisted on the # other end stream = io.StringIO() taskDef.config.saveToStream(stream) taskDescription["config"] = stream.getvalue() taskDescription["label"] = taskDef.label inputs = [] outputs = [] # Determine the connection between all of tasks and save that in # the header as a list of connections and edges in each task # this will help in un-persisting, and possibly in a "quick view" # method that does not require everything to be un-persisted # # Typing returns can't be parameter dependent for connection in inverseLookup[taskDef]: # type: ignore consumers = self._datasetDict.getConsumers(connection) producer = self._datasetDict.getProducer(connection) if taskDef in consumers: # This checks if the task consumes the connection directly # from the datastore or it is produced by another task producerLabel = producer.label if producer is not None else "datastore" inputs.append((producerLabel, connection)) elif taskDef not in consumers and producer is taskDef: # If there are no consumers for this tasks produced # connection, the output will be said to be the datastore # in which case the for loop will be a zero length loop if not consumers: outputs.append(("datastore", connection)) for td in consumers: outputs.append((td.label, connection)) # dump to json string, and encode that string to bytes and then # conpress those bytes dump = lzma.compress(json.dumps(taskDescription).encode()) # record the sizing and relation information taskDefMap[taskDef.label] = { "bytes": (count, count + len(dump)), "inputs": inputs, "outputs": outputs, } count += len(dump) jsonData.append(dump) headerData["TaskDefs"] = taskDefMap # serialize the nodes, recording the start and end bytes of each node dimAccumulator = DimensionRecordsAccumulator() for node in self: # compressing has very little impact on saving or load time, but # a large impact on on disk size, so it is worth doing simpleNode = node.to_simple(accumulator=dimAccumulator) dump = lzma.compress(simpleNode.json().encode()) jsonData.append(dump) nodeMap.append(( str(node.nodeId), { "bytes": (count, count + len(dump)), "inputs": [ str(n.nodeId) for n in self.determineInputsToQuantumNode(node) ], "outputs": [ str(n.nodeId) for n in self.determineOutputsOfQuantumNode(node) ], }, )) count += len(dump) headerData["DimensionRecords"] = { key: value.dict() for key, value in dimAccumulator.makeSerializedDimensionRecordMapping().items() } # need to serialize this as a series of key,value tuples because of # a limitation on how json cant do anyting but strings as keys headerData["Nodes"] = nodeMap # dump the headerData to json header_encode = lzma.compress(json.dumps(headerData).encode()) # record the sizes as 2 unsigned long long numbers for a total of 16 # bytes save_bytes = struct.pack(STRUCT_FMT_BASE, SAVE_VERSION) fmt_string = DESERIALIZER_MAP[SAVE_VERSION].FMT_STRING() map_lengths = struct.pack(fmt_string, len(header_encode)) # write each component of the save out in a deterministic order # buffer = io.BytesIO() # buffer.write(map_lengths) # buffer.write(taskDef_pickle) # buffer.write(map_pickle) buffer = bytearray() buffer.extend(MAGIC_BYTES) buffer.extend(save_bytes) buffer.extend(map_lengths) buffer.extend(header_encode) # Iterate over the length of pickleData, and for each element pop the # leftmost element off the deque and write it out. This is to save # memory, as the memory is added to the buffer object, it is removed # from from the container. # # Only this section needs to worry about memory pressue because # everything else written to the buffer prior to this pickle data is # only on the order of kilobytes to low numbers of megabytes. while jsonData: buffer.extend(jsonData.popleft()) if returnHeader: return buffer, headerData else: return buffer