def loadFileList(self, filelist, selection, scanlist=None): """ loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None) filelist is the list of file names belonging to the stack selection is a dictionary with the keys x, y, m. x is the path to the x data (the channels) in the spectrum, without the first level "directory". It is unused (for now). y is the path to the 1D data (the counts) in the spectrum, without the first level "directory" m is the path to the normalizing data (I0 or whatever) without the first level "directory". scanlist is the list of first level "directories" containing the 1D data Example: The actual path has the form: /whatever1/whatever2/counts That means scanlist = ["/whatever1"] and selection['y'] = "/whatever2/counts" """ _logger.info("filelist = %s", filelist) _logger.info("selection = %s", selection) _logger.info("scanlist = %s", scanlist) # all the files in the same source hdfStack = NexusDataSource.NexusDataSource(filelist) # if there is more than one file, it is assumed all the files have # the same structure. tmpHdf = hdfStack._sourceObjectList[0] entryNames = [] for key in tmpHdf["/"].keys(): try: if isinstance(tmpHdf["/" + key], h5py.Group): entryNames.append(key) except KeyError: _logger.info("Broken link with key? <%s>" % key) # built the selection in terms of HDF terms # for the time being xSelectionList = selection.get('x', None) if xSelectionList == []: xSelectionList = None if xSelectionList is not None: if type(xSelectionList) != type([]): xSelectionList = [xSelectionList] if len(xSelectionList): xSelection = xSelectionList[0] else: xSelection = None else: xSelection = None # only one y is taken ySelection = selection['y'] if type(ySelection) == type([]): ySelectionList = list(ySelection) ySelection = ySelection[0] else: ySelectionList = [ySelection] # monitor selection mSelection = selection.get('m', None) if mSelection not in [None, []]: if type(mSelection) != type([]): mSelection = [mSelection] if type(mSelection) == type([]): if len(mSelection): mSelection = mSelection[0] else: mSelection = None else: mSelection = None USE_JUST_KEYS = False # deal with the pathological case where the scanlist corresponds # to a selected top level dataset if len(entryNames) == 0: if scanlist is not None: if (ySelection in scanlist) or \ (xSelection in scanlist) or \ (mSelection in scanlist): scanlist = None USE_JUST_KEYS = True else: USE_JUST_KEYS = True elif len(entryNames) == 1: # deal with the SOLEIL case of one entry but with different name # in different files USE_JUST_KEYS = True elif scanlist in [None, []]: USE_JUST_KEYS = True if USE_JUST_KEYS: # if the scanlist is None, it is assumed we are interested on all # the scans containing the selection, not that all the scans # contain the selection. scanlist = [] if 0: JUST_KEYS = False #expect same entry names in the files #Unfortunately this does not work for SOLEIL for entry in entryNames: path = "/" + entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: file_entry = tmpHdf[dirname] if base in file_entry.keys(): scanlist.append(entry) except: pass else: JUST_KEYS = True #expect same structure in the files even if the #names are different (SOLEIL ...) if len(entryNames): i = 0 for entry in entryNames: i += 1 path = "/" + entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: file_entry = tmpHdf[dirname] if hasattr(file_entry, "keys"): if base in file_entry.keys(): # this is the case of a selection inside a group scanlist.append("1.%d" % i) except KeyError: _logger.warning("%s not in file, ignoring.", dirname) if not len(scanlist): if not ySelection.startswith("/"): path = "/" + ySelection else: path = ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: if dirname in tmpHdf["/"]: # this is the case of a dataset at top plevel # or having given the complete path if base in tmpHdf[dirname]: JUST_KEYS = False scanlist.append("") elif base in file_entry.keys(): JUST_KEYS = False scanlist.append("") except: #it will crash later on pass else: JUST_KEYS = False scanlist.append("") else: try: number, order = [int(x) for x in scanlist[0].split(".")] JUST_KEYS = True except: JUST_KEYS = False if not JUST_KEYS: for scan in scanlist: if scan.startswith("/"): t = scan[1:] else: t = scan if t not in entryNames: raise ValueError("Entry %s not in file" % scan) nFiles = len(filelist) nScans = len(scanlist) if JUST_KEYS: if not nScans: raise IOError("No entry contains the required data") _logger.debug("Retained number of files = %d", nFiles) _logger.debug("Retained number of scans = %d", nScans) # Now is to decide the number of mca ... # I assume all the scans contain the same number of mca if JUST_KEYS: path = "/" + entryNames[int(scanlist[0].split(".")[-1]) - 1] + ySelection if mSelection is not None: mpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) - 1] + mSelection if xSelectionList is not None: xpathList = [] for xSelection in xSelectionList: xpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) - 1] + xSelection xpathList.append(xpath) else: path = scanlist[0] + ySelection if mSelection is not None: mpath = scanlist[0] + mSelection if xSelectionList is not None: xpathList = [] for xSelection in xSelectionList: xpath = scanlist[0] + xSelection xpathList.append(xpath) yDataset = tmpHdf[path] if (self.__dtype is None) or (mSelection is not None): self.__dtype = yDataset.dtype if self.__dtype in [numpy.int16, numpy.uint16]: self.__dtype = numpy.float32 elif self.__dtype in [numpy.int32, numpy.uint32]: if mSelection: self.__dtype = numpy.float32 else: self.__dtype = numpy.float64 elif self.__dtype not in [ numpy.float16, numpy.float32, numpy.float64 ]: # Some datasets form CLS (origin APS?) arrive as data format # equal to ">u2" and are not triggered as integer types _logger.debug("Not basic dataset type %s", self.__dtype) if ("%s" % self.__dtype).endswith("2"): self.__dtype = numpy.float32 else: if mSelection: self.__dtype = numpy.float32 else: self.__dtype = numpy.float64 # figure out the shape of the stack shape = yDataset.shape mcaIndex = selection.get('index', len(shape) - 1) if mcaIndex == -1: mcaIndex = len(shape) - 1 _logger.debug("mcaIndex = %d", mcaIndex) considerAsImages = False dim0, dim1, mcaDim = self.getDimensions(nFiles, nScans, shape, index=mcaIndex) try: if self.__dtype in [numpy.float32, numpy.int32]: bytefactor = 4 elif self.__dtype in [numpy.int16, numpy.uint16]: bytefactor = 2 elif self.__dtype in [numpy.int8, numpy.uint8]: bytefactor = 1 else: bytefactor = 8 neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor / (1024 * 1024.)) _logger.info("Using %d bytes per item" % bytefactor) _logger.info("Needed %d Megabytes" % neededMegaBytes) physicalMemory = None if hasattr(PhysicalMemory, "getAvailablePhysicalMemoryOrNone"): physicalMemory = PhysicalMemory.getAvailablePhysicalMemoryOrNone( ) if not physicalMemory: physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone() else: _logger.info("Available physical memory %.1f GBytes" % \ (physicalMemory/(1024*1024*1024.))) if physicalMemory is None: # 6 Gigabytes of available memory # should be a good compromise in 2018 physicalMemory = 6000 _logger.info("Assumed physical memory %.1f MBytes" % physicalMemory) else: physicalMemory /= (1024 * 1024.) _logger.info("Using physical memory %.1f GBytes" % (physicalMemory / 1024)) if (neededMegaBytes > (0.95*physicalMemory))\ and (nFiles == 1) and (len(shape) == 3): if self.__dtype0 is None: if (bytefactor == 8) and (neededMegaBytes < (2 * physicalMemory)): # try reading as float32 print("Forcing the use of float32 data") self.__dtype = numpy.float32 else: raise MemoryError("Force dynamic loading") else: raise MemoryError("Force dynamic loading") if (mcaIndex == 0) and (nFiles == 1) and (nScans == 1): #keep the original arrangement but in memory self.data = numpy.zeros(yDataset.shape, self.__dtype) considerAsImages = True else: # force arrangement as spectra self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype) DONE = False except (MemoryError, ValueError): # some versions report ValueError instead of MemoryError if (nFiles == 1) and (len(shape) == 3): _logger.warning("Attempting dynamic loading") if mSelection is not None: _logger.warning("Ignoring monitor") self.data = yDataset if mSelection is not None: mdtype = tmpHdf[mpath].dtype if mdtype not in [numpy.float64, numpy.float32]: mdtype = numpy.float64 mDataset = numpy.asarray(tmpHdf[mpath], dtype=mdtype) self.monitor = [mDataset] if xSelectionList is not None: if len(xpathList) == 1: xpath = xpathList[0] xDataset = tmpHdf[xpath][()] self.x = [xDataset] if h5py.version.version < '2.0': #prevent automatic closing keeping a reference #to the open file self._fileReference = hdfStack DONE = True else: # what to do if the number of dimensions is only 2? raise # get the positioners information associated to the path positioners = {} try: positionersGroup = NexusTools.getPositionersGroup(tmpHdf, path) for motorName, motorValues in positionersGroup.items(): positioners[motorName] = motorValues[()] except: positionersGroup = None positioners = {} # get the mca information associated to the path mcaObjectPaths = NexusTools.getMcaObjectPaths(tmpHdf, path) _time = None _calibration = None _channels = None if considerAsImages: self._pathHasRelevantInfo = False else: numberOfRelevantInfoKeys = 0 for objectPath in mcaObjectPaths: if objectPath not in ["counts", "target"]: numberOfRelevantInfoKeys += 1 if numberOfRelevantInfoKeys: # not just "counts" or "target" self._pathHasRelevantInfo = True if "live_time" in mcaObjectPaths: if DONE: # hopefully it will fit into memory if mcaObjectPaths["live_time"] in tmpHdf: _time = tmpHdf[mcaObjectPaths["live_time"]][()] elif "::" in mcaObjectPaths["live_time"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["live_time"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _time = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["live_time"] else: # we have to have as many live times as MCA spectra _time = numpy.zeros( \ (self.data.shape[0] * self.data.shape[1]), dtype=numpy.float64) elif "elapsed_time" in mcaObjectPaths: if DONE: # hopefully it will fit into memory if mcaObjectPaths["elapsed_time"] in tmpHdf: _time = \ tmpHdf[mcaObjectPaths["elapsed_time"]][()] elif "::" in mcaObjectPaths["elapsed_time"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["elapsed_time"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _time = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["elapsed_time"] else: # we have to have as many elpased times as MCA spectra _time = numpy.zeros( (self.data.shape[0] * self.data.shape[1]), numpy.float32) if "calibration" in mcaObjectPaths: if mcaObjectPaths["calibration"] in tmpHdf: _calibration = \ tmpHdf[mcaObjectPaths["calibration"]][()] elif "::" in mcaObjectPaths["calibration"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["calibration"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _calibration = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["calibration"] if "channels" in mcaObjectPaths: if mcaObjectPaths["channels"] in tmpHdf: _channels = \ tmpHdf[mcaObjectPaths["channels"]][()] elif "::" in mcaObjectPaths["channels"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["channels"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _channels = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["channels"] else: self._pathHasRelevantInfo = False if (not DONE) and (not considerAsImages): _logger.info("Data in memory as spectra") self.info["McaIndex"] = 2 n = 0 if dim0 == 1: self.onBegin(dim1) else: self.onBegin(dim0) self.incrProgressBar = 0 for hdf in hdfStack._sourceObjectList: entryNames = list(hdf["/"].keys()) goodEntryNames = [] for entry in entryNames: tmpPath = "/" + entry try: if hasattr(hdf[tmpPath], "keys"): goodEntryNames.append(entry) except KeyError: _logger.info("Broken link with key? <%s>" % tmpPath) for scan in scanlist: IN_MEMORY = None nStart = n for ySelection in ySelectionList: n = nStart if JUST_KEYS: entryName = goodEntryNames[ int(scan.split(".")[-1]) - 1] path = entryName + ySelection if mSelection is not None: mpath = entryName + mSelection mdtype = hdf[mpath].dtype if mdtype not in [ numpy.float64, numpy.float32 ]: mdtype = numpy.float64 mDataset = numpy.asarray(hdf[mpath], dtype=mdtype) if xSelectionList is not None: xDatasetList = [] for xSelection in xSelectionList: xpath = entryName + xSelection xDataset = hdf[xpath][()] xDatasetList.append(xDataset) else: path = scan + ySelection if mSelection is not None: mpath = scan + mSelection mdtype = hdf[mpath].dtype if mdtype not in [ numpy.float64, numpy.float32 ]: mdtype = numpy.float64 mDataset = numpy.asarray(hdf[mpath], dtype=mdtype) if xSelectionList is not None: xDatasetList = [] for xSelection in xSelectionList: xpath = scan + xSelection xDataset = hdf[xpath][()] xDatasetList.append(xDataset) try: yDataset = hdf[path] tmpShape = yDataset.shape totalBytes = numpy.ones((1, ), yDataset.dtype).itemsize for nItems in tmpShape: totalBytes *= nItems # should one be conservative or just try? if (totalBytes / (1024. * 1024.)) > (0.4 * physicalMemory): _logger.info( "Force dynamic loading of spectra") #read from disk IN_MEMORY = False else: #read the data into memory _logger.info( "Attempt to load whole map into memory") yDataset = hdf[path][()] IN_MEMORY = True except (MemoryError, ValueError): _logger.info("Dynamic loading of spectra") yDataset = hdf[path] IN_MEMORY = False nMcaInYDataset = 1 for dim in yDataset.shape: nMcaInYDataset *= dim nMcaInYDataset = int(nMcaInYDataset / mcaDim) timeData = None if _time is not None: if "live_time" in mcaObjectPaths: # it is assumed that all have the same structure!!! timePath = NexusTools.getMcaObjectPaths( hdf, path)["live_time"] elif "elapsed_time" in mcaObjectPaths: timePath = NexusTools.getMcaObjectPaths( hdf, path)["elapsed_time"] if timePath in hdf: timeData = hdf[timePath][()] elif "::" in timePath: externalFile, externalPath = timePath.split( "::") with h5py.File(externalFile, "r") as timeHdf: timeData = timeHdf[externalPath][()] if mcaIndex != 0: if IN_MEMORY: yDataset.shape = -1, mcaDim if mSelection is not None: case = -1 nMonitorData = 1 for v in mDataset.shape: nMonitorData *= v if nMonitorData == nMcaInYDataset: mDataset.shape = nMcaInYDataset case = 0 elif nMonitorData == (nMcaInYDataset * mcaDim): case = 1 mDataset.shape = nMcaInYDataset, mcaDim if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if timeData is not None: case = -1 nTimeData = 1 for v in timeData.shape: nTimeData *= v if nTimeData == nMcaInYDataset: timeData.shape = nMcaInYDataset case = 0 _time[nStart:nStart + nMcaInYDataset] += timeData if case == -1: _logger.warning( "I do not know how to handle this time data" ) _logger.warning( "Ignoring time information") _time = None if (len(yDataset.shape) == 3) and\ (dim1 == yDataset.shape[1]): mca = 0 deltaI = int(yDataset.shape[1] / dim1) for ii in range(yDataset.shape[0]): i = int(n / dim1) yData = yDataset[ii:(ii + 1)] yData.shape = -1, mcaDim if mSelection is not None: if case == 0: mData = numpy.outer( mDataset[mca:(mca + dim1)], numpy.ones((mcaDim))) self.data[i, :, :] += yData / mData elif case == 1: mData = mDataset[mca:(mca + dim1), :] mData.shape = -1, mcaDim self.data[i, :, :] += yData / mData else: self.data[i:(i + deltaI), :] += yData n += yDataset.shape[1] mca += dim1 else: for mca in range(nMcaInYDataset): i = int(n / dim1) j = n % dim1 if len(yDataset.shape) == 3: ii = int(mca / yDataset.shape[1]) jj = mca % yDataset.shape[1] yData = yDataset[ii, jj] elif len(yDataset.shape) == 2: yData = yDataset[mca, :] elif len(yDataset.shape) == 1: yData = yDataset if mSelection is not None: if case == 0: self.data[ i, j, :] += yData / mDataset[mca] elif case == 1: self.data[ i, j, :] += yData / mDataset[ mca, :] else: self.data[i, j, :] += yData n += 1
def loadFileList(self, filelist, fileindex=0): if type(filelist) == type(''): filelist = [filelist] self.__keyList = [] self.sourceName = filelist self.__indexedStack = True self.sourceType = SOURCE_TYPE self.info = {} self.nbFiles = len(filelist) #read first edf file #get information tempEdf = EdfFileDataSource.EdfFileDataSource(filelist[0]) keylist = tempEdf.getSourceInfo()['KeyList'] nImages = len(keylist) dataObject = tempEdf.getDataObject(keylist[0]) self.info.update(dataObject.info) if len(dataObject.data.shape) == 3: #this is already a stack self.data = dataObject.data self.__nFiles = 1 self.__nImagesPerFile = nImages shape = self.data.shape for i in range(len(shape)): key = 'Dim_%d' % (i + 1, ) self.info[key] = shape[i] self.info["SourceType"] = SOURCE_TYPE self.info["SourceName"] = filelist[0] self.info["Size"] = 1 self.info["NumberOfFiles"] = 1 self.info["FileIndex"] = fileindex return arrRet = dataObject.data if self.__dtype is None: self.__dtype = arrRet.dtype self.onBegin(self.nbFiles) singleImageShape = arrRet.shape actualImageStack = False if (fileindex == 2) or (self.__imageStack): self.__imageStack = True if len(singleImageShape) == 1: #single line #be ready for specfile stack? self.onEnd() raise IOError("Not implemented yet") self.data = numpy.zeros( (arrRet.shape[0], nImages, self.nbFiles), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack = tempEdf.GetData(i) self.data[:, i, self.incrProgressBar] = pieceOfStack[:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: if nImages > 1: #this is not the common case #should I try to convert it to a standard one #using a 3D matrix or keep as 4D matrix? if self.nbFiles > 1: raise IOError(\ "Multiple files with multiple images implemented yet") self.data = numpy.zeros((arrRet.shape[0], arrRet.shape[1], nImages * self.nbFiles), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack = tempEdf.GetData(i) self.data[:,:, nImages*self.incrProgressBar+i] = \ pieceOfStack[:,:] self.incrProgressBar += 1 else: #this is the common case try: # calculate needed megabytes if self.__dtype == numpy.float: bytefactor = 8 else: bytefactor = 4 needed_ = self.nbFiles * \ arrRet.shape[0] *\ arrRet.shape[1] * bytefactor physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone( ) if physicalMemory is not None: # spare 5% or memory if physicalMemory < (1.05 * needed_): raise MemoryError( "Not enough physical memory available") if self.__imageStack: self.data = numpy.zeros( (self.nbFiles, arrRet.shape[0], arrRet.shape[1]), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile( tempEdfFileName, 'rb') pieceOfStack = tempEdf.GetData(0) self.data[self.incrProgressBar] = pieceOfStack self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) actualImageStack = True else: self.data = numpy.zeros( (arrRet.shape[0], arrRet.shape[1], self.nbFiles), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile( tempEdfFileName, 'rb') pieceOfStack = tempEdf.GetData(0) self.data[:, :, self.incrProgressBar] = pieceOfStack self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) except (MemoryError, ValueError): hdf5done = False if HDF5 and (('PyMcaQt' in sys.modules) or\ ('PyMca.PyMcaQt' in sys.modules)): from PyMca5 import PyMcaQt as qt from PyMca5 import ArraySave msg = qt.QMessageBox.information( None, "Memory error\n", "Do you want to convert your data to HDF5?\n", qt.QMessageBox.Yes, qt.QMessageBox.No) if msg != qt.QMessageBox.No: hdf5file = qt.QFileDialog.getSaveFileName( None, "Please select output file name", os.path.dirname(filelist[0]), "HDF5 files *.h5") if not len(hdf5file): raise IOError("Invalid output file") hdf5file = qt.safe_str(hdf5file) if not hdf5file.endswith(".h5"): hdf5file += ".h5" hdf, self.data = ArraySave.getHDF5FileInstanceAndBuffer( hdf5file, (self.nbFiles, arrRet.shape[0], arrRet.shape[1])) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile( tempEdfFileName, 'rb') pieceOfStack = tempEdf.GetData(0) self.data[ self. incrProgressBar, :, :] = pieceOfStack[:, :] hdf.flush() self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) hdf5done = True if not hdf5done: for i in range(3): print("\7") samplingStep = None i = 2 while samplingStep is None: print( "**************************************************" ) print( " Memory error!, attempting %dx%d sampling reduction " ) % (i, i) print( "**************************************************" ) s1, s2 = arrRet[::i, ::i].shape try: self.data = numpy.zeros( (s1, s2, self.nbFiles), self.__dtype) samplingStep = i except: i += 1 self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile( tempEdfFileName, 'rb') pieceOfStack = tempEdf.GetData(0) self.data[:, :, self. incrProgressBar] = pieceOfStack[:: samplingStep, :: samplingStep] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: self.__imageStack = False if len(singleImageShape) == 1: #single line #be ready for specfile stack? raise IOError("Not implemented yet") self.data = numpy.zeros( (self.nbFiles, arrRet.shape[0], nImages), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack = tempEdf.GetData(i) self.data[self.incrProgressBar, :, i] = pieceOfStack[:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: if nImages > 1: #this is not the common case #should I try to convert it to a standard one #using a 3D matrix or kepp as 4D matrix? if self.nbFiles > 1: if (arrRet.shape[0] > 1) and\ (arrRet.shape[1] > 1): raise IOError(\ "Multiple files with multiple images not implemented yet") elif arrRet.shape[0] == 1: self.data = numpy.zeros( (self.nbFiles, arrRet.shape[0] * nImages, arrRet.shape[1]), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile( tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack = tempEdf.GetData(i) self.data[self.incrProgressBar, i,:] = \ pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) elif arrRet.shape[1] == 1: self.data = numpy.zeros( (self.nbFiles, arrRet.shape[1] * nImages, arrRet.shape[0]), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile( tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack = tempEdf.GetData(i) self.data[self.incrProgressBar, i,:] = \ pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) else: self.data = numpy.zeros( (nImages * self.nbFiles, arrRet.shape[0], arrRet.shape[1]), self.__dtype) self.incrProgressBar = 0 for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack = tempEdf.GetData(i) self.data[nImages * self.incrProgressBar + i, :, :] = pieceOfStack[:, :] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: if fileindex == 1: try: self.data = numpy.zeros( (arrRet.shape[0], self.nbFiles, arrRet.shape[1]), self.__dtype) except: try: self.data = numpy.zeros( (arrRet.shape[0], self.nbFiles, arrRet.shape[1]), numpy.float32) except: self.data = numpy.zeros( (arrRet.shape[0], self.nbFiles, arrRet.shape[1]), numpy.int16) else: try: # calculate needed megabytes if self.__dtype == numpy.float: bytefactor = 8 else: bytefactor = 4 needed_ = self.nbFiles * \ arrRet.shape[0] *\ arrRet.shape[1] * 4 physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone( ) if physicalMemory is not None: # spare 5% of memory if physicalMemory < (1.05 * needed_): raise MemoryError( "Not enough physical memory available") self.data = numpy.zeros( (self.nbFiles, arrRet.shape[0], arrRet.shape[1]), self.__dtype) except: try: needed_ = self.nbFiles * \ arrRet.shape[0] *\ arrRet.shape[1] * 4 physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone( ) if physicalMemory is not None: # spare 5 % of memory if physicalMemory < (1.05 * needed_): raise MemoryError( "Not enough physical memory available" ) self.data = numpy.zeros( (self.nbFiles, arrRet.shape[0], arrRet.shape[1]), numpy.float32) except (MemoryError, ValueError): text = "Memory Error: Attempt subsampling or convert to HDF5" if HDF5 and (('PyMcaQt' in sys.modules) or\ ('PyMca.PyMcaQt' in sys.modules)): from PyMca5 import PyMcaQt as qt from PyMca5 import ArraySave msg = qt.QMessageBox.information( None, "Memory error\n", "Do you want to convert your data to HDF5?\n", qt.QMessageBox.Yes, qt.QMessageBox.No) if msg == qt.QMessageBox.No: raise MemoryError(text) hdf5file = qt.QFileDialog.getSaveFileName( None, "Please select output file name", os.path.dirname(filelist[0]), "HDF5 files *.h5") if not len(hdf5file): raise IOError(\ "Invalid output file") hdf5file = qt.safe_str(hdf5file) if not hdf5file.endswith(".h5"): hdf5file += ".h5" hdf, self.data = ArraySave.getHDF5FileInstanceAndBuffer( hdf5file, (self.nbFiles, arrRet.shape[0], arrRet.shape[1])) else: raise MemoryError("Memory Error") self.incrProgressBar = 0 if fileindex == 1: for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb') pieceOfStack = tempEdf.GetData(0) self.data[:, self. incrProgressBar, :] = pieceOfStack[:, :] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) else: # test for ID24 map ID24 = False if "_sample_" in filelist[0]: bckFile = filelist[0].replace( "_sample_", "_samplebk_") if os.path.exists(bckFile): bckData = EdfFile.EdfFile(bckFile).GetData(0) else: bckData = 0 i0StartFile = filelist[0].replace( "_sample_", "_I0start_") if os.path.exists(i0StartFile): ID24 = True id24idx = 0 i0Start = EdfFile.EdfFile( i0StartFile, 'rb').GetData(0).astype(numpy.float) i0Start -= bckData i0EndFile = filelist[0].replace( "_sample_", "_I0end_") i0Slope = 0.0 if os.path.exists(i0EndFile): i0End = EdfFile.EdfFile( i0EndFile, 'rb').GetData(0) - bckData i0Slope = (i0End - i0Start) / len(filelist) positionersFile = filelist[0].replace( "_sample_", "_positioners_") if os.path.exists(positionersFile): positionersEdf = EdfFile.EdfFile( positionersFile, 'rb') self.info["positioners"] = {} for i in range(positionersEdf.GetNumImages()): motorName = positionersEdf.GetHeader( i).get("Title", "Motor_%02d" % i) motorValue = positionersEdf.GetData(i) self.info["positioners"][ motorName] = motorValue for tempEdfFileName in filelist: tempEdf = EdfFile.EdfFile(tempEdfFileName, 'rb') if ID24: pieceOfStack = -numpy.log( (tempEdf.GetData(0) - bckData) / (i0Start[0, :] + id24idx * i0Slope)) pieceOfStack[numpy.isfinite(pieceOfStack) == False] = 1 id24idx += 1 else: pieceOfStack = tempEdf.GetData(0) try: self.data[ self. incrProgressBar, :, :] = pieceOfStack[:, :] except: if pieceOfStack.shape[1] != arrRet.shape[1]: print(" ERROR on file %s" % tempEdfFileName) print( " DIM 1 error Assuming missing data were at the end!!!" ) if pieceOfStack.shape[0] != arrRet.shape[0]: print(" ERROR on file %s" % tempEdfFileName) print( " DIM 0 error Assuming missing data were at the end!!!" ) self.data[self.incrProgressBar,\ :pieceOfStack.shape[0],\ :pieceOfStack.shape[1]] = pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() self.__nFiles = self.incrProgressBar self.__nImagesPerFile = nImages shape = self.data.shape for i in range(len(shape)): key = 'Dim_%d' % (i + 1, ) self.info[key] = shape[i] if not isinstance(self.data, numpy.ndarray): hdf.flush() self.info["SourceType"] = "HDF5Stack1D" if self.__imageStack: self.info["McaIndex"] = 0 self.info["FileIndex"] = 1 else: self.info["McaIndex"] = 2 self.info["FileIndex"] = 0 self.info["SourceName"] = [hdf5file] self.info["NumberOfFiles"] = 1 self.info["Size"] = 1 elif actualImageStack: self.info["SourceType"] = SOURCE_TYPE self.info["McaIndex"] = 0 self.info["FileIndex"] = 1 self.info["SourceName"] = self.sourceName self.info["NumberOfFiles"] = self.__nFiles * 1 self.info["Size"] = self.__nFiles * self.__nImagesPerFile else: self.info["SourceType"] = SOURCE_TYPE self.info["FileIndex"] = fileindex self.info["SourceName"] = self.sourceName self.info["NumberOfFiles"] = self.__nFiles * 1 self.info["Size"] = self.__nFiles * self.__nImagesPerFile # try to use positioners to compute the scales (ID24 specific) xPositionerName = None yPositionerName = None if "positioners" in self.info and len(self.info["positioners"]) == 2: for k, v in self.info["positioners"].items(): if isinstance(v, numpy.ndarray) and v.ndim == 2: deltaDim1 = v[:, 1:] - v[:, :-1] deltaDim0 = v[1:, :] - v[:-1, :] if numpy.any(deltaDim1) and not numpy.any(deltaDim0): # positioner varying only along dim1 xPositionerName = k # should we check that all delta values are equal? deltaX = numpy.mean(deltaDim1) originX = v[0, 0] elif numpy.any(deltaDim0) and not numpy.any(deltaDim1): # positioner varying only along dim0 yPositionerName = k deltaY = numpy.mean(deltaDim0) originY = v[0, 0] if xPositionerName is not None and yPositionerName is not None: self.info["xScale"] = (originX, deltaX) self.info["yScale"] = (originY, deltaY)
def loadFileList(self, filelist, selection, scanlist=None): """ loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None) filelist is the list of file names belonging to the stack selection is a dictionary with the keys x, y, m. x is the path to the x data (the channels) in the spectrum, without the first level "directory". It is unused (for now). y is the path to the 1D data (the counts) in the spectrum, without the first level "directory" m is the path to the normalizing data (I0 or whatever) without the first level "directory". scanlist is the list of first level "directories" containing the 1D data Example: The actual path has the form: /whatever1/whatever2/counts That means scanlist = ["/whatever1"] and selection['y'] = "/whatever2/counts" """ if DEBUG: print("filelist = ", filelist) print("selection = ", selection) print("scanlist = ", scanlist) # all the files in the same source hdfStack = NexusDataSource.NexusDataSource(filelist) #if there is more than one file, it is assumed all the files have #the same structure. tmpHdf = hdfStack._sourceObjectList[0] entryNames = [] for key in tmpHdf["/"].keys(): if isinstance(tmpHdf["/"+key], h5py.Group): entryNames.append(key) # built the selection in terms of HDF terms # for the time being, only the first item in x selection used xSelection = selection['x'] if xSelection is not None: if type(xSelection) != type([]): xSelection = [xSelection] if type(xSelection) == type([]): if len(xSelection): xSelection = xSelection[0] else: xSelection = None else: xSelection = None # only one y is taken ySelection = selection['y'] if type(ySelection) == type([]): ySelection = ySelection[0] # monitor selection mSelection = selection['m'] if mSelection not in [None, []]: if type(mSelection) != type([]): mSelection = [mSelection] if type(mSelection) == type([]): if len(mSelection): mSelection = mSelection[0] else: mSelection = None else: mSelection = None USE_JUST_KEYS = False # deal with the pathological case where the scanlist corresponds # to a selected top level dataset if len(entryNames) == 0: if scanlist is not None: if len(scanlist) == 1: if scanlist[0] == ySelection: scanlist = None elif len(entryNames) == 1: # deal with the SOLEIL case of one entry but with different name # in different files USE_JUST_KEYS = True elif scanlist in [None, []]: USE_JUST_KEYS = True if USE_JUST_KEYS: #if the scanlist is None, it is assumed we are interested on all #the scans containing the selection, not that all the scans #contain the selection. scanlist = [] if 0: JUST_KEYS = False #expect same entry names in the files #Unfortunately this does not work for SOLEIL for entry in entryNames: path = "/"+entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: if base in tmpHdf[dirname].keys(): scanlist.append(entry) except: pass else: JUST_KEYS = True #expect same structure in the files even if the #names are different (SOLEIL ...) if len(entryNames): i = 0 for entry in entryNames: path = "/"+entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) if hasattr(tmpHdf[dirname], "keys"): i += 1 if base in tmpHdf[dirname].keys(): scanlist.append("1.%d" % i) if not len(scanlist): path = "/" + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: if base in tmpHdf[dirname].keys(): JUST_KEYS = False scanlist.append("") except: #it will crash later on pass else: JUST_KEYS = False scanlist.append("") else: try: number, order = [int(x) for x in scanlist[0].split(".")] JUST_KEYS = True except: JUST_KEYS = False if not JUST_KEYS: for scan in scanlist: if scan.startswith("/"): t = scan[1:] else: t = scan if t not in entryNames: raise ValueError("Entry %s not in file" % scan) nFiles = len(filelist) nScans = len(scanlist) if JUST_KEYS: if not nScans: raise IOError("No entry contains the required data") if DEBUG: print("Retained number of files = %d" % nFiles) print("Retained number of scans = %d" % nScans) #Now is to decide the number of mca ... #I assume all the scans contain the same number of mca if JUST_KEYS: path = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + ySelection if mSelection is not None: mpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + mSelection if xSelection is not None: xpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + xSelection else: path = scanlist[0] + ySelection if mSelection is not None: mpath = scanlist[0] + mSelection if xSelection is not None: xpath = scanlist[0] + xSelection yDataset = tmpHdf[path] if self.__dtype is None: self.__dtype = yDataset.dtype if self.__dtype in [numpy.int16, numpy.uint16]: self.__dtype = numpy.float32 elif self.__dtype in [numpy.int32, numpy.uint32]: self.__dtype = numpy.float64 #figure out the shape of the stack shape = yDataset.shape mcaIndex = selection.get('index', len(shape)-1) if mcaIndex == -1: mcaIndex = len(shape) - 1 if DEBUG: print("mcaIndex = %d" % mcaIndex) considerAsImages = False dim0, dim1, mcaDim = self.getDimensions(nFiles, nScans, shape, index=mcaIndex) try: if self.__dtype in [numpy.float32, numpy.int32]: bytefactor = 4 elif self.__dtype in [numpy.int16, numpy.uint16]: bytefactor = 2 elif self.__dtype in [numpy.int8, numpy.uint8]: bytefactor = 1 else: bytefactor = 8 neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor/(1024*1024.)) physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone() if physicalMemory is None: # 5 Gigabytes should be a good compromise physicalMemory = 6000 else: physicalMemory /= (1024*1024.) if (neededMegaBytes > (0.95*physicalMemory))\ and (nFiles == 1) and (len(shape) == 3): if self.__dtype0 is None: if (bytefactor == 8) and (neededMegaBytes < (2*physicalMemory)): #try reading as float32 self.__dtype = numpy.float32 else: raise MemoryError("Force dynamic loading") else: raise MemoryError("Force dynamic loading") if (mcaIndex == 0) and ( nFiles == 1) and (nScans == 1): #keep the original arrangement but in memory self.data = numpy.zeros(yDataset.shape, self.__dtype) considerAsImages = True else: # force arrangement as spectra self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype) DONE = False except (MemoryError, ValueError): #some versions report ValueError instead of MemoryError if (nFiles == 1) and (len(shape) == 3): print("Attempting dynamic loading") self.data = yDataset if mSelection is not None: mDataset = tmpHdf[mpath].value self.monitor = [mDataset] if xSelection is not None: xDataset = tmpHdf[xpath].value self.x = [xDataset] if h5py.version.version < '2.0': #prevent automatic closing keeping a reference #to the open file self._fileReference = hdfStack DONE = True else: #what to do if the number of dimensions is only 2? raise if (not DONE) and (not considerAsImages): self.info["McaIndex"] = 2 n = 0 if dim0 == 1: self.onBegin(dim1) else: self.onBegin(dim0) self.incrProgressBar=0 for hdf in hdfStack._sourceObjectList: entryNames = list(hdf["/"].keys()) goodEntryNames = [] for entry in entryNames: tmpPath = "/" + entry if hasattr(hdf[tmpPath], "keys"): goodEntryNames.append(entry) for scan in scanlist: if JUST_KEYS: entryName = goodEntryNames[int(scan.split(".")[-1])-1] path = entryName + ySelection if mSelection is not None: mpath = entryName + mSelection mDataset = hdf[mpath].value if xSelection is not None: xpath = entryName + xSelection xDataset = hdf[xpath].value else: path = scan + ySelection if mSelection is not None: mpath = scan + mSelection mDataset = hdf[mpath].value if xSelection is not None: xpath = scan + xSelection xDataset = hdf[xpath].value try: yDataset = hdf[path] tmpShape = yDataset.shape totalBytes = numpy.ones((1,), yDataset.dtype).itemsize for nItems in tmpShape: totalBytes *= nItems if (totalBytes/(1024.*1024.)) > 500: #read from disk IN_MEMORY = False else: #read the data into memory yDataset = hdf[path].value IN_MEMORY = True except (MemoryError, ValueError): yDataset = hdf[path] IN_MEMORY = False nMcaInYDataset = 1 for dim in yDataset.shape: nMcaInYDataset *= dim nMcaInYDataset = int(nMcaInYDataset/mcaDim) if mcaIndex != 0: if IN_MEMORY: yDataset.shape = -1, mcaDim if mSelection is not None: case = -1 nMonitorData = 1 for v in mDataset.shape: nMonitorData *= v if nMonitorData == nMcaInYDataset: mDataset.shape = nMcaInYDataset case = 0 elif nMonitorData == (nMcaInYDataset * mcaDim): case = 1 mDataset.shape = nMcaInYDataset, mcaDim if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if (len(yDataset.shape) == 3) and\ (dim1 == yDataset.shape[1]): mca = 0 deltaI = int(yDataset.shape[1]/dim1) for ii in range(yDataset.shape[0]): i = int(n/dim1) yData = yDataset[ii:(ii+1)] yData.shape = -1, mcaDim if mSelection is not None: if case == 0: mData = numpy.outer(mDataset[mca:(mca+dim1)], numpy.ones((mcaDim))) self.data[i, :, :] = yData/mData elif case == 1: mData = mDataset[mca:(mca+dim1), :] mData.shape = -1, mcaDim self.data[i, :, :] = yData/mData else: self.data[i:(i+deltaI), :] = yData n += yDataset.shape[1] mca += dim1 else: for mca in range(nMcaInYDataset): i = int(n/dim1) j = n % dim1 if len(yDataset.shape) == 3: ii = int(mca/yDataset.shape[1]) jj = mca % yDataset.shape[1] yData = yDataset[ii, jj] elif len(yDataset.shape) == 2: yData = yDataset[mca,:] elif len(yDataset.shape) == 1: yData = yDataset if mSelection is not None: if case == 0: self.data[i, j, :] = yData/mDataset[mca] elif case == 1: self.data[i, j, :] = yData/mDataset[mca, :] else: self.data[i, j, :] = yData n += 1 else: if mSelection is not None: case = -1 nMonitorData = 1 for v in mDataset.shape: nMonitorData *= v if nMonitorData == yDataset.shape[0]: case = 3 mDataset.shape = yDataset.shape[0] elif nMonitorData == nMcaInYDataset: mDataset.shape = nMcaInYDataset case = 0 #elif nMonitorData == (yDataset.shape[1] * yDataset.shape[2]): # case = 1 # mDataset.shape = yDataset.shape[1], yDataset.shape[2] if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if IN_MEMORY: yDataset.shape = mcaDim, -1 if len(yDataset.shape) != 3: for mca in range(nMcaInYDataset): i = int(n/dim1) j = n % dim1 if len(yDataset.shape) == 3: ii = int(mca/yDataset.shape[2]) jj = mca % yDataset.shape[2] yData = yDataset[:, ii, jj] elif len(yDataset.shape) == 2: yData = yDataset[:, mca] elif len(yDataset.shape) == 1: yData = yDataset[:] if mSelection is not None: if case == 0: self.data[i, j, :] = yData/mDataset[mca] elif case == 1: self.data[i, j, :] = yData/mDataset[:, mca] elif case == 3: self.data[i, j, :] = yData/mDataset else: self.data[i, j, :] = yData n += 1 else: #stack of images to be read as MCA for nImage in range(yDataset.shape[0]): tmp = yDataset[nImage:(nImage+1)] if len(tmp.shape) == 3: i = int(n/dim1) j = n % dim1 if 0: #this loop is extremely SLOW!!!(and useless) for ii in range(tmp.shape[1]): for jj in range(tmp.shape[2]): self.data[i+ii, j+jj, nImage] = tmp[0, ii, jj] else: self.data[i:i+tmp.shape[1], j:j+tmp.shape[2], nImage] = tmp[0] if mSelection is not None: for mca in range(yDataset.shape[0]): i = int(n/dim1) j = n % dim1 yData = self.data[i, j, :] if case == 0: self.data[i, j, :] = yData/mDataset[mca] elif case == 1: self.data[i, j, :] = yData/mDataset[:, mca] n += 1 else: n += tmp.shape[1] * tmp.shape[2] if dim0 == 1: self.onProgress(j) if dim0 != 1: self.onProgress(i) self.onEnd() elif not DONE: # data into memory but as images self.info["McaIndex"] = mcaIndex for hdf in hdfStack._sourceObjectList: entryNames = list(hdf["/"].keys()) for scan in scanlist: if JUST_KEYS: entryName = entryNames[int(scan.split(".")[-1])-1] path = entryName + ySelection if mSelection is not None: mpath = entryName + mSelection mDataset.shape if xSelection is not None: xpath = entryName + xSelection xDataset = hdf[xpath].value else: path = scan + ySelection if mSelection is not None: mpath = scan + mSelection mDataset = hdf[mpath].value if xSelection is not None: xpath = scan + xSelection xDataset = hdf[xpath].value if mSelection is not None: nMonitorData = mDataset.size case = -1 yDatasetShape = yDataset.shape if nMonitorData == yDatasetShape[0]: #as many monitor data as images mDataset.shape = yDatasetShape[0] case = 0 elif nMonitorData == (yDatasetShape[1] * yDatasetShape[2]): #as many monitorData as pixels case = 1 mDataset.shape = yDatasetShape[1], yDatasetShape[2] if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if case == 0: for i in range(yDatasetShape[0]): self.data[i] = yDataset[i].value / mDataset[i] elif case == 1: for i in range(yDataset.shape[0]): self.data[i] = yDataset[i] / mDataset else: for i in range(yDataset.shape[0]): self.data[i:i+1] = yDataset[i:i+1] else: self.info["McaIndex"] = mcaIndex self.info["SourceType"] = SOURCE_TYPE self.info["SourceName"] = filelist self.info["Size"] = 1 self.info["NumberOfFiles"] = 1 if mcaIndex == 0: self.info["FileIndex"] = 1 else: self.info["FileIndex"] = 0 self.info['McaCalib'] = [ 0.0, 1.0, 0.0] self.info['Channel0'] = 0 shape = self.data.shape for i in range(len(shape)): key = 'Dim_%d' % (i+1,) self.info[key] = shape[i] if xSelection is not None: if xDataset.size == shape[self.info['McaIndex']]: self.x = [xDataset.reshape(-1)] else: print("Ignoring xSelection")
def loadFileList(self, filelist, selection, scanlist=None): """ loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None) filelist is the list of file names belonging to the stack selection is a dictionary with the keys x, y, m. x is the path to the x data (the channels) in the spectrum, without the first level "directory". It is unused (for now). y is the path to the 1D data (the counts) in the spectrum, without the first level "directory" m is the path to the normalizing data (I0 or whatever) without the first level "directory". scanlist is the list of first level "directories" containing the 1D data Example: The actual path has the form: /whatever1/whatever2/counts That means scanlist = ["/whatever1"] and selection['y'] = "/whatever2/counts" """ _logger.debug("filelist = %s", filelist) _logger.debug("selection = %s", selection) _logger.debug("scanlist = %s", scanlist) # all the files in the same source hdfStack = NexusDataSource.NexusDataSource(filelist) # if there is more than one file, it is assumed all the files have # the same structure. tmpHdf = hdfStack._sourceObjectList[0] entryNames = [] for key in tmpHdf["/"].keys(): if isinstance(tmpHdf["/"+key], h5py.Group): entryNames.append(key) # built the selection in terms of HDF terms # for the time being, only the first item in x selection used xSelection = selection.get('x', None) if xSelection is not None: if type(xSelection) != type([]): xSelection = [xSelection] if type(xSelection) == type([]): if len(xSelection): xSelection = xSelection[0] else: xSelection = None else: xSelection = None # only one y is taken ySelection = selection['y'] if type(ySelection) == type([]): ySelectionList = list(ySelection) ySelection = ySelection[0] else: ySelectionList = [ySelection] # monitor selection mSelection = selection.get('m', None) if mSelection not in [None, []]: if type(mSelection) != type([]): mSelection = [mSelection] if type(mSelection) == type([]): if len(mSelection): mSelection = mSelection[0] else: mSelection = None else: mSelection = None USE_JUST_KEYS = False # deal with the pathological case where the scanlist corresponds # to a selected top level dataset if len(entryNames) == 0: if scanlist is not None: if (ySelection in scanlist) or \ (xSelection in scanlist) or \ (mSelection in scanlist): scanlist = None USE_JUST_KEYS = True else: USE_JUST_KEYS = True elif len(entryNames) == 1: # deal with the SOLEIL case of one entry but with different name # in different files USE_JUST_KEYS = True elif scanlist in [None, []]: USE_JUST_KEYS = True if USE_JUST_KEYS: # if the scanlist is None, it is assumed we are interested on all # the scans containing the selection, not that all the scans # contain the selection. scanlist = [] if 0: JUST_KEYS = False #expect same entry names in the files #Unfortunately this does not work for SOLEIL for entry in entryNames: path = "/" + entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: file_entry = tmpHdf[dirname] if base in file_entry.keys(): scanlist.append(entry) except: pass else: JUST_KEYS = True #expect same structure in the files even if the #names are different (SOLEIL ...) if len(entryNames): i = 0 for entry in entryNames: i += 1 path = "/" + entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: file_entry = tmpHdf[dirname] if hasattr(file_entry, "keys"): if base in file_entry.keys(): # this is the case of a selection inside a group scanlist.append("1.%d" % i) except KeyError: _logger.warning("%s not in file, ignoring.", dirname) if not len(scanlist): if not ySelection.startswith("/"): path = "/" + ySelection else: path = ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: if dirname in tmpHdf["/"]: # this is the case of a dataset at top plevel # or having given the complete path if base in tmpHdf[dirname]: JUST_KEYS = False scanlist.append("") elif base in file_entry.keys(): JUST_KEYS = False scanlist.append("") except: #it will crash later on pass else: JUST_KEYS = False scanlist.append("") else: try: number, order = [int(x) for x in scanlist[0].split(".")] JUST_KEYS = True except: JUST_KEYS = False if not JUST_KEYS: for scan in scanlist: if scan.startswith("/"): t = scan[1:] else: t = scan if t not in entryNames: raise ValueError("Entry %s not in file" % scan) nFiles = len(filelist) nScans = len(scanlist) if JUST_KEYS: if not nScans: raise IOError("No entry contains the required data") _logger.debug("Retained number of files = %d", nFiles) _logger.debug("Retained number of scans = %d", nScans) # Now is to decide the number of mca ... # I assume all the scans contain the same number of mca if JUST_KEYS: path = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + ySelection if mSelection is not None: mpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + mSelection if xSelection is not None: xpath = "/" + entryNames[int(scanlist[0].split(".")[-1])-1] + xSelection else: path = scanlist[0] + ySelection if mSelection is not None: mpath = scanlist[0] + mSelection if xSelection is not None: xpath = scanlist[0] + xSelection yDataset = tmpHdf[path] if (self.__dtype is None) or (mSelection is not None): self.__dtype = yDataset.dtype if self.__dtype in [numpy.int16, numpy.uint16]: self.__dtype = numpy.float32 elif self.__dtype in [numpy.int32, numpy.uint32]: if mSelection: self.__dtype = numpy.float32 else: self.__dtype = numpy.float64 elif self.__dtype not in [numpy.float16, numpy.float32, numpy.float64]: # Some datasets form CLS (origin APS?) arrive as data format # equal to ">u2" and are not triggered as integer types _logger.debug("Not basic dataset type %s", self.__dtype) if ("%s" % self.__dtype).endswith("2"): self.__dtype = numpy.float32 else: if mSelection: self.__dtype = numpy.float32 else: self.__dtype = numpy.float64 # figure out the shape of the stack shape = yDataset.shape mcaIndex = selection.get('index', len(shape)-1) if mcaIndex == -1: mcaIndex = len(shape) - 1 _logger.debug("mcaIndex = %d", mcaIndex) considerAsImages = False dim0, dim1, mcaDim = self.getDimensions(nFiles, nScans, shape, index=mcaIndex) try: if self.__dtype in [numpy.float32, numpy.int32]: bytefactor = 4 elif self.__dtype in [numpy.int16, numpy.uint16]: bytefactor = 2 elif self.__dtype in [numpy.int8, numpy.uint8]: bytefactor = 1 else: bytefactor = 8 neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor/(1024*1024.)) _logger.info("Using %d bytes per item" % bytefactor) _logger.info("Needed %d Megabytes" % neededMegaBytes) physicalMemory = None if hasattr(PhysicalMemory, "getAvailablePhysicalMemoryOrNone"): physicalMemory = PhysicalMemory.getAvailablePhysicalMemoryOrNone() if not physicalMemory: physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone() else: _logger.info("Available physical memory %.1f GBytes" % \ (physicalMemory/(1024*1024*1024.))) if physicalMemory is None: # 6 Gigabytes of available memory # should be a good compromise in 2018 physicalMemory = 6000 _logger.info("Assumed physical memory %.1f MBytes" % physicalMemory) else: physicalMemory /= (1024*1024.) _logger.info("Using physical memory %.1f GBytes" % (physicalMemory/1024)) if (neededMegaBytes > (0.95*physicalMemory))\ and (nFiles == 1) and (len(shape) == 3): if self.__dtype0 is None: if (bytefactor == 8) and (neededMegaBytes < (2*physicalMemory)): # try reading as float32 print("Forcing the use of float32 data") self.__dtype = numpy.float32 else: raise MemoryError("Force dynamic loading") else: raise MemoryError("Force dynamic loading") if (mcaIndex == 0) and ( nFiles == 1) and (nScans == 1): #keep the original arrangement but in memory self.data = numpy.zeros(yDataset.shape, self.__dtype) considerAsImages = True else: # force arrangement as spectra self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype) DONE = False except (MemoryError, ValueError): # some versions report ValueError instead of MemoryError if (nFiles == 1) and (len(shape) == 3): _logger.warning("Attempting dynamic loading") if mSelection is not None: _logger.warning("Ignoring monitor") self.data = yDataset if mSelection is not None: mdtype = tmpHdf[mpath].dtype if mdtype not in [numpy.float64, numpy.float32]: mdtype = numpy.float64 mDataset = numpy.asarray(tmpHdf[mpath], dtype=mdtype) self.monitor = [mDataset] if xSelection is not None: xDataset = tmpHdf[xpath][()] self.x = [xDataset] if h5py.version.version < '2.0': #prevent automatic closing keeping a reference #to the open file self._fileReference = hdfStack DONE = True else: # what to do if the number of dimensions is only 2? raise # get the mca information associated to the path mcaObjectPaths = NexusTools.getMcaObjectPaths(tmpHdf, path) _time = None _calibration = None _channels = None if considerAsImages: self._pathHasRelevantInfo = False else: if len(list(mcaObjectPaths.keys())) > 1: # not just "counts" self._pathHasRelevantInfo = True if "live_time" in mcaObjectPaths: if DONE: # hopefully it will fit into memory if mcaObjectPaths["live_time"] in tmpHdf: _time = tmpHdf[mcaObjectPaths["live_time"]][()] elif "::" in mcaObjectPaths["live_time"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["live_time"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _time = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["live_time"] else: # we have to have as many live times as MCA spectra _time = numpy.zeros( \ (self.data.shape[0] * self.data.shape[1]), dtype=numpy.float64) elif "elapsed_time" in mcaObjectPaths: if DONE: # hopefully it will fit into memory if mcaObjectPaths["elapsed_time"] in tmpHdf: _time = \ tmpHdf[mcaObjectPaths["elapsed_time"]][()] elif "::" in mcaObjectPaths["elapsed_time"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["elapsed_time"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _time = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["elapsed_time"] else: # we have to have as many elpased times as MCA spectra _time = numpy.zeros((self.data.shape[0] * self.data.shape[1]), numpy.float32) if "calibration" in mcaObjectPaths: if mcaObjectPaths["calibration"] in tmpHdf: _calibration = \ tmpHdf[mcaObjectPaths["calibration"]][()] elif "::" in mcaObjectPaths["calibration"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["calibration"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _calibration = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["calibration"] if "channels" in mcaObjectPaths: if mcaObjectPaths["channels"] in tmpHdf: _channels = \ tmpHdf[mcaObjectPaths["channels"]][()] elif "::" in mcaObjectPaths["channels"]: tmpFileName, tmpDatasetPath = \ mcaObjectPaths["channels"].split("::") with h5py.File(tmpFileName, "r") as tmpH5: _channels = tmpH5[tmpDatasetPath][()] else: del mcaObjectPaths["channels"] else: self._pathHasRelevantInfo = False if (not DONE) and (not considerAsImages): _logger.info("Data in memory as spectra") self.info["McaIndex"] = 2 n = 0 if dim0 == 1: self.onBegin(dim1) else: self.onBegin(dim0) self.incrProgressBar=0 for hdf in hdfStack._sourceObjectList: entryNames = list(hdf["/"].keys()) goodEntryNames = [] for entry in entryNames: tmpPath = "/" + entry if hasattr(hdf[tmpPath], "keys"): goodEntryNames.append(entry) for scan in scanlist: IN_MEMORY = None nStart = n for ySelection in ySelectionList: n = nStart if JUST_KEYS: entryName = goodEntryNames[int(scan.split(".")[-1])-1] path = entryName + ySelection if mSelection is not None: mpath = entryName + mSelection mdtype = hdf[mpath].dtype if mdtype not in [numpy.float64, numpy.float32]: mdtype = numpy.float64 mDataset = numpy.asarray(hdf[mpath], dtype=mdtype) if xSelection is not None: xpath = entryName + xSelection xDataset = hdf[xpath][()] else: path = scan + ySelection if mSelection is not None: mpath = scan + mSelection mdtype = hdf[mpath].dtype if mdtype not in [numpy.float64, numpy.float32]: mdtype = numpy.float64 mDataset = numpy.asarray(hdf[mpath], dtype=mdtype) if xSelection is not None: xpath = scan + xSelection xDataset = hdf[xpath][()] try: yDataset = hdf[path] tmpShape = yDataset.shape totalBytes = numpy.ones((1,), yDataset.dtype).itemsize for nItems in tmpShape: totalBytes *= nItems # should one be conservative or just try? if (totalBytes/(1024.*1024.)) > (0.4 * physicalMemory): _logger.info("Force dynamic loading of spectra") #read from disk IN_MEMORY = False else: #read the data into memory _logger.info("Attempt to load whole map into memory") yDataset = hdf[path][()] IN_MEMORY = True except (MemoryError, ValueError): _logger.info("Dynamic loading of spectra") yDataset = hdf[path] IN_MEMORY = False nMcaInYDataset = 1 for dim in yDataset.shape: nMcaInYDataset *= dim nMcaInYDataset = int(nMcaInYDataset/mcaDim) timeData = None if _time is not None: if "live_time" in mcaObjectPaths: # it is assumed that all have the same structure!!! timePath = NexusTools.getMcaObjectPaths(hdf, path)["live_time"] elif "elapsed_time" in mcaObjectPaths: timePath = NexusTools.getMcaObjectPaths(hdf, path)["elapsed_time"] if timePath in hdf: timeData = hdf[timePath][()] elif "::" in timePath: externalFile, externalPath = timePath.split("::") with h5py.File(externalFile, "r") as timeHdf: timeData = timeHdf[externalPath][()] if mcaIndex != 0: if IN_MEMORY: yDataset.shape = -1, mcaDim if mSelection is not None: case = -1 nMonitorData = 1 for v in mDataset.shape: nMonitorData *= v if nMonitorData == nMcaInYDataset: mDataset.shape = nMcaInYDataset case = 0 elif nMonitorData == (nMcaInYDataset * mcaDim): case = 1 mDataset.shape = nMcaInYDataset, mcaDim if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if timeData is not None: case = -1 nTimeData = 1 for v in timeData.shape: nTimeData *= v if nTimeData == nMcaInYDataset: timeData.shape = nMcaInYDataset case = 0 _time[nStart: nStart + nMcaInYDataset] += timeData if case == -1: _logger.warning("I do not know how to handle this time data") _logger.warning("Ignoring time information") _time= None if (len(yDataset.shape) == 3) and\ (dim1 == yDataset.shape[1]): mca = 0 deltaI = int(yDataset.shape[1]/dim1) for ii in range(yDataset.shape[0]): i = int(n/dim1) yData = yDataset[ii:(ii+1)] yData.shape = -1, mcaDim if mSelection is not None: if case == 0: mData = numpy.outer(mDataset[mca:(mca+dim1)], numpy.ones((mcaDim))) self.data[i, :, :] += yData / mData elif case == 1: mData = mDataset[mca:(mca+dim1), :] mData.shape = -1, mcaDim self.data[i, :, :] += yData / mData else: self.data[i:(i+deltaI), :] += yData n += yDataset.shape[1] mca += dim1 else: for mca in range(nMcaInYDataset): i = int(n/dim1) j = n % dim1 if len(yDataset.shape) == 3: ii = int(mca/yDataset.shape[1]) jj = mca % yDataset.shape[1] yData = yDataset[ii, jj] elif len(yDataset.shape) == 2: yData = yDataset[mca,:] elif len(yDataset.shape) == 1: yData = yDataset if mSelection is not None: if case == 0: self.data[i, j, :] += yData / mDataset[mca] elif case == 1: self.data[i, j, :] += yData / mDataset[mca, :] else: self.data[i, j, :] += yData n += 1 else: if mSelection is not None: case = -1 nMonitorData = 1 for v in mDataset.shape: nMonitorData *= v if nMonitorData == yDataset.shape[0]: case = 3 mDataset.shape = yDataset.shape[0] elif nMonitorData == nMcaInYDataset: mDataset.shape = nMcaInYDataset case = 0 #elif nMonitorData == (yDataset.shape[1] * yDataset.shape[2]): # case = 1 # mDataset.shape = yDataset.shape[1], yDataset.shape[2] if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if IN_MEMORY: yDataset.shape = mcaDim, -1 if len(yDataset.shape) != 3: for mca in range(nMcaInYDataset): i = int(n/dim1) j = n % dim1 if len(yDataset.shape) == 3: ii = int(mca/yDataset.shape[2]) jj = mca % yDataset.shape[2] yData = yDataset[:, ii, jj] elif len(yDataset.shape) == 2: yData = yDataset[:, mca] elif len(yDataset.shape) == 1: yData = yDataset[:] if mSelection is not None: if case == 0: self.data[i, j, :] += yData / mDataset[mca] elif case == 1: self.data[i, j, :] += yData / mDataset[:, mca] elif case == 3: self.data[i, j, :] += yData / mDataset else: self.data[i, j, :] += yData n += 1 else: #stack of images to be read as MCA for nImage in range(yDataset.shape[0]): tmp = yDataset[nImage:(nImage+1)] if len(tmp.shape) == 3: i = int(n/dim1) j = n % dim1 if 0: #this loop is extremely SLOW!!!(and useless) for ii in range(tmp.shape[1]): for jj in range(tmp.shape[2]): self.data[i+ii, j+jj, nImage] += tmp[0, ii, jj] else: self.data[i:i+tmp.shape[1], j:j+tmp.shape[2], nImage] += tmp[0] if mSelection is not None: for mca in range(yDataset.shape[0]): i = int(n/dim1) j = n % dim1 yData = self.data[i, j, :] if case == 0: self.data[i, j, :] += yData / mDataset[mca] elif case == 1: self.data[i, j, :] += yData / mDataset[:, mca] n += 1 else: n += tmp.shape[1] * tmp.shape[2] yDataset = None if dim0 == 1: self.onProgress(j) if dim0 != 1: self.onProgress(i) self.onEnd() elif not DONE: # data into memory but as images self.info["McaIndex"] = mcaIndex for hdf in hdfStack._sourceObjectList: entryNames = list(hdf["/"].keys()) for scan in scanlist: for ySelection in ySelectionList: if JUST_KEYS: entryName = entryNames[int(scan.split(".")[-1])-1] path = entryName + ySelection if mSelection is not None: mpath = entryName + mSelection mDataset.shape if xSelection is not None: xpath = entryName + xSelection xDataset = hdf[xpath][()] else: path = scan + ySelection if mSelection is not None: mpath = scan + mSelection mdtype = hdf[mpath].dtype if mdtype not in [numpy.float64, numpy.float32]: mdtype = numpy.float64 mDataset = numpy.asarray(hdf[mpath], dtype=mdtype) if xSelection is not None: xpath = scan + xSelection xDataset = hdf[xpath][()] if mSelection is not None: nMonitorData = mDataset.size case = -1 yDatasetShape = yDataset.shape if nMonitorData == yDatasetShape[0]: #as many monitor data as images mDataset.shape = yDatasetShape[0] case = 0 elif nMonitorData == (yDatasetShape[1] * yDatasetShape[2]): #as many monitorData as pixels case = 1 mDataset.shape = yDatasetShape[1], yDatasetShape[2] if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if case == 0: for i in range(yDatasetShape[0]): self.data[i] += yDataset[i][()] / mDataset[i] elif case == 1: for i in range(yDataset.shape[0]): self.data[i] += yDataset[i] / mDataset else: for i in range(yDataset.shape[0]): self.data[i:i+1] += yDataset[i:i+1] else: self.info["McaIndex"] = mcaIndex if _time: nRequiredValues = 1 for i in range(len(self.data.shape)): if i != mcaIndex: nRequiredValues *= self.data.shape[i] if _time.size != nRequiredValues: _logger.warning("I do not know how to interpret the time information") _logger.warning("Ignoring time information") _time = None else: _time.shape = -1 self.info["SourceType"] = SOURCE_TYPE self.info["SourceName"] = filelist self.info["Size"] = 1 self.info["NumberOfFiles"] = 1 if mcaIndex == 0: self.info["FileIndex"] = 1 else: self.info["FileIndex"] = 0 if _calibration is not None: self.info['McaCalib'] = _calibration else: self.info['McaCalib'] = [ 0.0, 1.0, 0.0] shape = self.data.shape for i in range(len(shape)): key = 'Dim_%d' % (i+1,) self.info[key] = shape[i] self.info['Channel0'] = 0 if xSelection is not None: if xDataset.size == shape[self.info['McaIndex']]: self.x = [xDataset.reshape(-1)] else: _logger.warning("Ignoring xSelection") elif _channels is not None: _channels.shape = -1 self.x = [_channels] if _time is not None: self.info["McaLiveTime"] = _time
def loadFileList(self, filelist, selection, scanlist=None): """ loadFileList(self, filelist, y, scanlist=None, monitor=None, x=None) filelist is the list of file names belonging to the stack selection is a dictionary with the keys x, y, m. x is the path to the x data (the channels) in the spectrum, without the first level "directory". It is unused (for now). y is the path to the 1D data (the counts) in the spectrum, without the first level "directory" m is the path to the normalizing data (I0 or whatever) without the first level "directory". scanlist is the list of first level "directories" containing the 1D data Example: The actual path has the form: /whatever1/whatever2/counts That means scanlist = ["/whatever1"] and selection['y'] = "/whatever2/counts" """ if DEBUG: print("filelist = ", filelist) print("selection = ", selection) print("scanlist = ", scanlist) # all the files in the same source hdfStack = NexusDataSource.NexusDataSource(filelist) #if there is more than one file, it is assumed all the files have #the same structure. tmpHdf = hdfStack._sourceObjectList[0] entryNames = [] for key in tmpHdf["/"].keys(): if isinstance(tmpHdf["/" + key], h5py.Group): entryNames.append(key) # built the selection in terms of HDF terms # for the time being, only the first item in x selection used xSelection = selection['x'] if xSelection is not None: if type(xSelection) != type([]): xSelection = [xSelection] if type(xSelection) == type([]): if len(xSelection): xSelection = xSelection[0] else: xSelection = None else: xSelection = None # only one y is taken ySelection = selection['y'] if type(ySelection) == type([]): ySelection = ySelection[0] # monitor selection mSelection = selection['m'] if mSelection not in [None, []]: if type(mSelection) != type([]): mSelection = [mSelection] if type(mSelection) == type([]): if len(mSelection): mSelection = mSelection[0] else: mSelection = None else: mSelection = None USE_JUST_KEYS = False # deal with the pathological case where the scanlist corresponds # to a selected top level dataset if len(entryNames) == 0: if scanlist is not None: if len(scanlist) == 1: if scanlist[0] == ySelection: scanlist = None USE_JUST_KEYS = True elif len(entryNames) == 1: # deal with the SOLEIL case of one entry but with different name # in different files USE_JUST_KEYS = True elif scanlist in [None, []]: USE_JUST_KEYS = True if USE_JUST_KEYS: #if the scanlist is None, it is assumed we are interested on all #the scans containing the selection, not that all the scans #contain the selection. scanlist = [] if 0: JUST_KEYS = False #expect same entry names in the files #Unfortunately this does not work for SOLEIL for entry in entryNames: path = "/" + entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: if base in tmpHdf[dirname].keys(): scanlist.append(entry) except: pass else: JUST_KEYS = True #expect same structure in the files even if the #names are different (SOLEIL ...) if len(entryNames): i = 0 for entry in entryNames: path = "/" + entry + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) if hasattr(tmpHdf[dirname], "keys"): i += 1 if base in tmpHdf[dirname].keys(): scanlist.append("1.%d" % i) if not len(scanlist): path = "/" + ySelection dirname = posixpath.dirname(path) base = posixpath.basename(path) try: if base in tmpHdf[dirname].keys(): JUST_KEYS = False scanlist.append("") except: #it will crash later on pass else: JUST_KEYS = False scanlist.append("") else: try: number, order = [int(x) for x in scanlist[0].split(".")] JUST_KEYS = True except: JUST_KEYS = False if not JUST_KEYS: for scan in scanlist: if scan.startswith("/"): t = scan[1:] else: t = scan if t not in entryNames: raise ValueError("Entry %s not in file" % scan) nFiles = len(filelist) nScans = len(scanlist) if JUST_KEYS: if not nScans: raise IOError("No entry contains the required data") if DEBUG: print("Retained number of files = %d" % nFiles) print("Retained number of scans = %d" % nScans) #Now is to decide the number of mca ... #I assume all the scans contain the same number of mca if JUST_KEYS: path = "/" + entryNames[int(scanlist[0].split(".")[-1]) - 1] + ySelection if mSelection is not None: mpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) - 1] + mSelection if xSelection is not None: xpath = "/" + entryNames[int(scanlist[0].split(".")[-1]) - 1] + xSelection else: path = scanlist[0] + ySelection if mSelection is not None: mpath = scanlist[0] + mSelection if xSelection is not None: xpath = scanlist[0] + xSelection yDataset = tmpHdf[path] if self.__dtype is None: self.__dtype = yDataset.dtype if self.__dtype in [numpy.int16, numpy.uint16]: self.__dtype = numpy.float32 elif self.__dtype in [numpy.int32, numpy.uint32]: self.__dtype = numpy.float64 #figure out the shape of the stack shape = yDataset.shape mcaIndex = selection.get('index', len(shape) - 1) if mcaIndex == -1: mcaIndex = len(shape) - 1 if DEBUG: print("mcaIndex = %d" % mcaIndex) considerAsImages = False dim0, dim1, mcaDim = self.getDimensions(nFiles, nScans, shape, index=mcaIndex) try: if self.__dtype in [numpy.float32, numpy.int32]: bytefactor = 4 elif self.__dtype in [numpy.int16, numpy.uint16]: bytefactor = 2 elif self.__dtype in [numpy.int8, numpy.uint8]: bytefactor = 1 else: bytefactor = 8 neededMegaBytes = nFiles * dim0 * dim1 * (mcaDim * bytefactor / (1024 * 1024.)) physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone() if physicalMemory is None: # 5 Gigabytes should be a good compromise physicalMemory = 6000 else: physicalMemory /= (1024 * 1024.) if (neededMegaBytes > (0.95*physicalMemory))\ and (nFiles == 1) and (len(shape) == 3): if self.__dtype0 is None: if (bytefactor == 8) and (neededMegaBytes < (2 * physicalMemory)): #try reading as float32 self.__dtype = numpy.float32 else: raise MemoryError("Force dynamic loading") else: raise MemoryError("Force dynamic loading") if (mcaIndex == 0) and (nFiles == 1) and (nScans == 1): #keep the original arrangement but in memory self.data = numpy.zeros(yDataset.shape, self.__dtype) considerAsImages = True else: # force arrangement as spectra self.data = numpy.zeros((dim0, dim1, mcaDim), self.__dtype) DONE = False except (MemoryError, ValueError): #some versions report ValueError instead of MemoryError if (nFiles == 1) and (len(shape) == 3): print("Attempting dynamic loading") self.data = yDataset if mSelection is not None: mDataset = tmpHdf[mpath].value self.monitor = [mDataset] if xSelection is not None: xDataset = tmpHdf[xpath].value self.x = [xDataset] if h5py.version.version < '2.0': #prevent automatic closing keeping a reference #to the open file self._fileReference = hdfStack DONE = True else: #what to do if the number of dimensions is only 2? raise if (not DONE) and (not considerAsImages): self.info["McaIndex"] = 2 n = 0 if dim0 == 1: self.onBegin(dim1) else: self.onBegin(dim0) self.incrProgressBar = 0 for hdf in hdfStack._sourceObjectList: entryNames = list(hdf["/"].keys()) goodEntryNames = [] for entry in entryNames: tmpPath = "/" + entry if hasattr(hdf[tmpPath], "keys"): goodEntryNames.append(entry) for scan in scanlist: if JUST_KEYS: entryName = goodEntryNames[int(scan.split(".")[-1]) - 1] path = entryName + ySelection if mSelection is not None: mpath = entryName + mSelection mDataset = hdf[mpath].value if xSelection is not None: xpath = entryName + xSelection xDataset = hdf[xpath].value else: path = scan + ySelection if mSelection is not None: mpath = scan + mSelection mDataset = hdf[mpath].value if xSelection is not None: xpath = scan + xSelection xDataset = hdf[xpath].value try: yDataset = hdf[path] tmpShape = yDataset.shape totalBytes = numpy.ones((1, ), yDataset.dtype).itemsize for nItems in tmpShape: totalBytes *= nItems if (totalBytes / (1024. * 1024.)) > 500: #read from disk IN_MEMORY = False else: #read the data into memory yDataset = hdf[path].value IN_MEMORY = True except (MemoryError, ValueError): yDataset = hdf[path] IN_MEMORY = False nMcaInYDataset = 1 for dim in yDataset.shape: nMcaInYDataset *= dim nMcaInYDataset = int(nMcaInYDataset / mcaDim) if mcaIndex != 0: if IN_MEMORY: yDataset.shape = -1, mcaDim if mSelection is not None: case = -1 nMonitorData = 1 for v in mDataset.shape: nMonitorData *= v if nMonitorData == nMcaInYDataset: mDataset.shape = nMcaInYDataset case = 0 elif nMonitorData == (nMcaInYDataset * mcaDim): case = 1 mDataset.shape = nMcaInYDataset, mcaDim if case == -1: raise ValueError(\ "I do not know how to handle this monitor data") if (len(yDataset.shape) == 3) and\ (dim1 == yDataset.shape[1]): mca = 0 deltaI = int(yDataset.shape[1] / dim1) for ii in range(yDataset.shape[0]): i = int(n / dim1) yData = yDataset[ii:(ii + 1)] yData.shape = -1, mcaDim if mSelection is not None: if case == 0: mData = numpy.outer( mDataset[mca:(mca + dim1)], numpy.ones((mcaDim))) self.data[i, :, :] = yData / mData elif case == 1: mData = mDataset[mca:(mca + dim1), :] mData.shape = -1, mcaDim self.data[i, :, :] = yData / mData else: self.data[i:(i + deltaI), :] = yData n += yDataset.shape[1] mca += dim1 else: for mca in range(nMcaInYDataset): i = int(n / dim1) j = n % dim1 if len(yDataset.shape) == 3: ii = int(mca / yDataset.shape[1]) jj = mca % yDataset.shape[1] yData = yDataset[ii, jj] elif len(yDataset.shape) == 2: yData = yDataset[mca, :] elif len(yDataset.shape) == 1: yData = yDataset if mSelection is not None: if case == 0: self.data[i, j, :] = yData / mDataset[mca] elif case == 1: self.data[ i, j, :] = yData / mDataset[mca, :] else: self.data[i, j, :] = yData n += 1 else: if mSelection is not None: case = -1 nMonitorData = 1 for v in mDataset.shape: nMonitorData *= v if nMonitorData == yDataset.shape[0]: case = 3 mDataset.shape = yDataset.shape[0] elif nMonitorData == nMcaInYDataset: mDataset.shape = nMcaInYDataset case = 0
def loadFileList(self, filelist, fileindex=0): if type(filelist) == type(''):filelist = [filelist] self.__keyList = [] self.sourceName = filelist self.__indexedStack = True self.sourceType = SOURCE_TYPE self.info = {} self.nbFiles=len(filelist) #read first edf file #get information tempEdf=EdfFileDataSource.EdfFileDataSource(filelist[0]) keylist = tempEdf.getSourceInfo()['KeyList'] nImages = len(keylist) dataObject = tempEdf.getDataObject(keylist[0]) self.info.update(dataObject.info) if len(dataObject.data.shape) == 3: #this is already a stack self.data = dataObject.data self.__nFiles = 1 self.__nImagesPerFile = nImages shape = self.data.shape for i in range(len(shape)): key = 'Dim_%d' % (i+1,) self.info[key] = shape[i] self.info["SourceType"] = SOURCE_TYPE self.info["SourceName"] = filelist[0] self.info["Size"] = 1 self.info["NumberOfFiles"] = 1 self.info["FileIndex"] = fileindex return arrRet = dataObject.data if self.__dtype is None: self.__dtype = arrRet.dtype self.onBegin(self.nbFiles) singleImageShape = arrRet.shape actualImageStack = False if (fileindex == 2) or (self.__imageStack): self.__imageStack = True if len(singleImageShape) == 1: #single line #be ready for specfile stack? self.onEnd() raise IOError("Not implemented yet") self.data = numpy.zeros((arrRet.shape[0], nImages, self.nbFiles), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack=tempEdf.GetData(i) self.data[:,i, self.incrProgressBar] = pieceOfStack[:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: if nImages > 1: #this is not the common case #should I try to convert it to a standard one #using a 3D matrix or keep as 4D matrix? if self.nbFiles > 1: raise IOError(\ "Multiple files with multiple images implemented yet") self.data = numpy.zeros((arrRet.shape[0], arrRet.shape[1], nImages * self.nbFiles), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack=tempEdf.GetData(i) self.data[:,:, nImages*self.incrProgressBar+i] = \ pieceOfStack[:,:] self.incrProgressBar += 1 else: #this is the common case try: # calculate needed megabytes if self.__dtype == numpy.float: bytefactor = 8 else: bytefactor = 4 needed_ = self.nbFiles * \ arrRet.shape[0] *\ arrRet.shape[1] * bytefactor physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone() if physicalMemory is not None: # spare 5% or memory if physicalMemory < (1.05 * needed_): raise MemoryError("Not enough physical memory available") if self.__imageStack: self.data = numpy.zeros((self.nbFiles, arrRet.shape[0], arrRet.shape[1]), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') pieceOfStack=tempEdf.GetData(0) self.data[self.incrProgressBar] = pieceOfStack self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) actualImageStack = True else: self.data = numpy.zeros((arrRet.shape[0], arrRet.shape[1], self.nbFiles), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') pieceOfStack=tempEdf.GetData(0) self.data[:,:, self.incrProgressBar] = pieceOfStack self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) except (MemoryError, ValueError): hdf5done = False if HDF5 and (('PyMcaQt' in sys.modules) or\ ('PyMca.PyMcaQt' in sys.modules)): from PyMca5 import PyMcaQt as qt from PyMca5 import ArraySave msg=qt.QMessageBox.information( None, "Memory error\n", "Do you want to convert your data to HDF5?\n", qt.QMessageBox.Yes,qt.QMessageBox.No) if msg != qt.QMessageBox.No: hdf5file = qt.QFileDialog.getSaveFileName(None, "Please select output file name", os.path.dirname(filelist[0]), "HDF5 files *.h5") if not len(hdf5file): raise IOError("Invalid output file") hdf5file = qt.safe_str(hdf5file) if not hdf5file.endswith(".h5"): hdf5file += ".h5" hdf, self.data = ArraySave.getHDF5FileInstanceAndBuffer(hdf5file, (self.nbFiles, arrRet.shape[0], arrRet.shape[1])) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') pieceOfStack=tempEdf.GetData(0) self.data[self.incrProgressBar,:,:] = pieceOfStack[:,:] hdf.flush() self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) hdf5done = True if not hdf5done: for i in range(3): print("\7") samplingStep = None i = 2 while samplingStep is None: print("**************************************************") print(" Memory error!, attempting %dx%d sampling reduction ") % (i,i) print("**************************************************") s1, s2 = arrRet[::i, ::i].shape try: self.data = numpy.zeros((s1, s2, self.nbFiles), self.__dtype) samplingStep = i except: i += 1 self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') pieceOfStack=tempEdf.GetData(0) self.data[:,:, self.incrProgressBar] = pieceOfStack[ ::samplingStep,::samplingStep] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: self.__imageStack = False if len(singleImageShape) == 1: #single line #be ready for specfile stack? raise IOError("Not implemented yet") self.data = numpy.zeros((self.nbFiles, arrRet.shape[0], nImages), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack=tempEdf.GetData(i) self.data[self.incrProgressBar, :,i] = pieceOfStack[:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: if nImages > 1: #this is not the common case #should I try to convert it to a standard one #using a 3D matrix or kepp as 4D matrix? if self.nbFiles > 1: if (arrRet.shape[0] > 1) and\ (arrRet.shape[1] > 1): raise IOError(\ "Multiple files with multiple images not implemented yet") elif arrRet.shape[0] == 1: self.data = numpy.zeros((self.nbFiles, arrRet.shape[0] * nImages, arrRet.shape[1]), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack=tempEdf.GetData(i) self.data[self.incrProgressBar, i,:] = \ pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) elif arrRet.shape[1] == 1: self.data = numpy.zeros((self.nbFiles, arrRet.shape[1] * nImages, arrRet.shape[0]), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack=tempEdf.GetData(i) self.data[self.incrProgressBar, i,:] = \ pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) else: self.data = numpy.zeros((nImages * self.nbFiles, arrRet.shape[0], arrRet.shape[1]), self.__dtype) self.incrProgressBar=0 for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') for i in range(nImages): pieceOfStack=tempEdf.GetData(i) self.data[nImages*self.incrProgressBar+i, :,:] = pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() else: if fileindex == 1: try: self.data = numpy.zeros((arrRet.shape[0], self.nbFiles, arrRet.shape[1]), self.__dtype) except: try: self.data = numpy.zeros((arrRet.shape[0], self.nbFiles, arrRet.shape[1]), numpy.float32) except: self.data = numpy.zeros((arrRet.shape[0], self.nbFiles, arrRet.shape[1]), numpy.int16) else: try: # calculate needed megabytes if self.__dtype == numpy.float: bytefactor = 8 else: bytefactor = 4 needed_ = self.nbFiles * \ arrRet.shape[0] *\ arrRet.shape[1] * 4 physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone() if physicalMemory is not None: # spare 5% of memory if physicalMemory < (1.05 * needed_): raise MemoryError("Not enough physical memory available") self.data = numpy.zeros((self.nbFiles, arrRet.shape[0], arrRet.shape[1]), self.__dtype) except: try: needed_ = self.nbFiles * \ arrRet.shape[0] *\ arrRet.shape[1] * 4 physicalMemory = PhysicalMemory.getPhysicalMemoryOrNone() if physicalMemory is not None: # spare 5 % of memory if physicalMemory < (1.05 * needed_): raise MemoryError("Not enough physical memory available") self.data = numpy.zeros((self.nbFiles, arrRet.shape[0], arrRet.shape[1]), numpy.float32) except (MemoryError, ValueError): text = "Memory Error: Attempt subsampling or convert to HDF5" if HDF5 and (('PyMcaQt' in sys.modules) or\ ('PyMca.PyMcaQt' in sys.modules)): from PyMca5 import PyMcaQt as qt from PyMca5 import ArraySave msg=qt.QMessageBox.information( None, "Memory error\n", "Do you want to convert your data to HDF5?\n", qt.QMessageBox.Yes,qt.QMessageBox.No) if msg == qt.QMessageBox.No: raise MemoryError(text) hdf5file = qt.QFileDialog.getSaveFileName(None, "Please select output file name", os.path.dirname(filelist[0]), "HDF5 files *.h5") if not len(hdf5file): raise IOError(\ "Invalid output file") hdf5file = qt.safe_str(hdf5file) if not hdf5file.endswith(".h5"): hdf5file += ".h5" hdf, self.data = ArraySave.getHDF5FileInstanceAndBuffer(hdf5file, (self.nbFiles, arrRet.shape[0], arrRet.shape[1])) else: raise MemoryError("Memory Error") self.incrProgressBar=0 if fileindex == 1: for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') pieceOfStack=tempEdf.GetData(0) self.data[:,self.incrProgressBar,:] = pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) else: # test for ID24 map ID24 = False if "_sample_" in filelist[0]: i0StartFile = filelist[0].replace("_sample_", "_I0start_") if os.path.exists(i0StartFile): ID24 = True id24idx = 0 i0Start = EdfFile.EdfFile(i0StartFile, 'rb').GetData(0).astype(numpy.float) i0EndFile = filelist[0].replace("_sample_", "_I0end_") i0Slope = 0.0 if os.path.exists(i0EndFile): i0End = EdfFile.EdfFile(i0EndFile, 'rb').GetData(0) i0Slope = (i0End-i0Start)/len(filelist) for tempEdfFileName in filelist: tempEdf=EdfFile.EdfFile(tempEdfFileName, 'rb') if ID24: pieceOfStack=-numpy.log(tempEdf.GetData(0)/(i0Start[0,:] + id24idx * i0Slope)) pieceOfStack[numpy.isfinite(pieceOfStack) == False] = 1 id24idx += 1 else: pieceOfStack=tempEdf.GetData(0) try: self.data[self.incrProgressBar, :,:] = pieceOfStack[:,:] except: if pieceOfStack.shape[1] != arrRet.shape[1]: print(" ERROR on file %s" % tempEdfFileName) print(" DIM 1 error Assuming missing data were at the end!!!") if pieceOfStack.shape[0] != arrRet.shape[0]: print(" ERROR on file %s" % tempEdfFileName) print(" DIM 0 error Assuming missing data were at the end!!!") self.data[self.incrProgressBar,\ :pieceOfStack.shape[0],\ :pieceOfStack.shape[1]] = pieceOfStack[:,:] self.incrProgressBar += 1 self.onProgress(self.incrProgressBar) self.onEnd() self.__nFiles = self.incrProgressBar self.__nImagesPerFile = nImages shape = self.data.shape for i in range(len(shape)): key = 'Dim_%d' % (i+1,) self.info[key] = shape[i] if not isinstance(self.data, numpy.ndarray): hdf.flush() self.info["SourceType"] = "HDF5Stack1D" if self.__imageStack: self.info["McaIndex"] = 0 self.info["FileIndex"] = 1 else: self.info["McaIndex"] = 2 self.info["FileIndex"] = 0 self.info["SourceName"] = [hdf5file] self.info["NumberOfFiles"] = 1 self.info["Size"] = 1 elif actualImageStack: self.info["SourceType"] = SOURCE_TYPE self.info["McaIndex"] = 0 self.info["FileIndex"] = 1 self.info["SourceName"] = self.sourceName self.info["NumberOfFiles"] = self.__nFiles * 1 self.info["Size"] = self.__nFiles * self.__nImagesPerFile else: self.info["SourceType"] = SOURCE_TYPE self.info["FileIndex"] = fileindex self.info["SourceName"] = self.sourceName self.info["NumberOfFiles"] = self.__nFiles * 1 self.info["Size"] = self.__nFiles * self.__nImagesPerFile