def get_precip_flag(cloudsat_filenames, cloudsat_dir=None, verbose=0): all_flags = [] for cloudsat_path in cloudsat_filenames: # if precipitation information is stored in another file if cloudsat_dir is not None: basename = os.path.basename(cloudsat_path) filename = glob.glob( os.path.join(cloudsat_dir, basename[:11] + "*.hdf"))[0] else: filename = cloudsat_path f = HDF(filename, SDC.READ) vs = f.vstart() vdata_precip = vs.attach('Precip_flag') precip = vdata_precip[:] if verbose: print("hdf information", vs.vdatainfo()) print('Nb pixels: ', len(precip)) print('Precip_flag values: ', np.unique(precip)) all_flags += precip # close everything vdata_precip.detach() vs.end() f.close() return np.array(all_flags).flatten().astype(np.int8)
def get_hdf_VD_file_variables(filename): """ Get all the variables from an HDF VD file :param filename: The filename of the file to get the variables from :returns: An OrderedDict containing the variables from the file """ variables = None if not HDF: raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.") try: # Open file datafile = HDF(filename) vs = datafile.vstart() # List of required variable names names = vs.vdatainfo() # This returns a list of tuples, so convert into a dictionary for easy lookup variables = {} for var in names: variables[var[0]] = var[1:] # Close file vs.end() datafile.close() except: logging.error("Error while reading VD data") return variables
def get_hdf_VD_file_variables(filename): """ Get all the variables from an HDF VD file :param filename: The filename of the file to get the variables from :returns: An OrderedDict containing the variables from the file """ variables = None if not HDF: raise ImportError( "HDF support was not installed, please reinstall with pyhdf to read HDF files." ) try: # Open file datafile = HDF(filename) vs = datafile.vstart() # List of required variable names names = vs.vdatainfo() # This returns a list of tuples, so convert into a dictionary for easy lookup variables = {} for var in names: variables[var[0]] = var[1:] # Close file vs.end() datafile.close() except: logging.error("Error while reading VD data") return variables
def get_variable_names(self, filenames, data_type=None): try: from pyhdf.SD import SD from pyhdf.HDF import HDF except ImportError: raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.") valid_variables = set([]) for filename in filenames: # Do VD variables datafile = HDF(filename) vdata = datafile.vstart() variables = vdata.vdatainfo() # Assumes that latitude shape == longitude shape (it should): # dim_length = [var[3] for var in variables if var[0] == 'Latitude'][0] for var in variables: # if var[3] == dim_length: valid_variables.add(var[0]) # Do SD variables: sd = SD(filename) datasets = sd.datasets() # if 'Height' in datasets: # valid_shape = datasets['Height'][1] for var in datasets: # if datasets[var][1] == valid_shape: valid_variables.add(var) return valid_variables
def get_variable_names(self, filenames, data_type=None): try: from pyhdf.SD import SD from pyhdf.HDF import HDF except ImportError: raise ImportError( "HDF support was not installed, please reinstall with pyhdf to read HDF files." ) valid_variables = set([]) for filename in filenames: # Do VD variables datafile = HDF(filename) vdata = datafile.vstart() variables = vdata.vdatainfo() # Assumes that latitude shape == longitude shape (it should): dim_length = [var[3] for var in variables if var[0] == 'Latitude'][0] for var in variables: if var[3] == dim_length: valid_variables.add(var[0]) # Do SD variables: sd = SD(filename) datasets = sd.datasets() if 'Height' in datasets: valid_shape = datasets['Height'][1] for var in datasets: if datasets[var][1] == valid_shape: valid_variables.add(var) return valid_variables
def get_vdata(self, VDataName): """Return VData (binary table) from hdf4. Parameters ---------- VDataName : str Name of the VData (stored as binary table in hdf) field Returns ------- dict returns VData dictionary """ try: h4 = HDF(self.filename) vs_handle = h4.vstart() # in the following vs_handle.vdatainfo() should give information # about all vdata, but this does not function correctly with MO # installation. # print vs_handle.vdatainfo() vd = vs_handle.attach(VDataName) vdi = vd.fieldinfo() vd.detach() vdata = {} for i in vdi: vd = vs_handle.attach(VDataName) vd.setfields(i[0]) vdata[i[0]] = vd.read() vd.detach() vs_handle.end() h4.close() except HDF4Error as e: raise HDF4Error(e) return vdata
def get_coordinates(cloudsat_filenames, verbose=0): all_latitudes, all_longitudes = [], [] for cloudsat_path in cloudsat_filenames: f = HDF(cloudsat_path, SDC.READ) vs = f.vstart() vdata_lat = vs.attach('Latitude') vdata_long = vs.attach('Longitude') latitudes = vdata_lat[:] longitudes = vdata_long[:] assert len(latitudes) == len(longitudes), "cloudsat hdf corrupted" if verbose: print("hdf information", vs.vdatainfo()) print('Nb pixels: ', len(latitudes)) print('Lat min, Lat max: ', min(latitudes), max(latitudes)) print('Long min, Long max: ', min(longitudes), max(longitudes)) all_latitudes += latitudes all_longitudes += longitudes # close everything vdata_lat.detach() vdata_long.detach() vs.end() f.close() return np.array(all_latitudes).flatten(), np.array( all_longitudes).flatten()
class HDF4File: """ Simplified interface for reading HDF4 files. It combines the SD and VS low-level interfaces. """ # Attributes: # variables(``list``): List of strings of variable names contained in # this file. def __init__(self, path): self.path = path self.file_handle = HDF(str(path)) self.scientific_dataset = SD(str(path)) datasets = self.scientific_dataset.datasets() dataset_dict = { key: Dataset(weakref.ref(self), key, *info) for key, info in datasets.items() } self.datasets = dataset_dict self.vdata_table = VS(self.file_handle) vdata_dict = { info[0]: VData(weakref.ref(self), *info) for info in self.vdata_table.vdatainfo() } self.vdata = vdata_dict def __del__(self): if self.file_handle: self.file_handle.close() self.file_handle = None @property def variables(self): """ Names of the variables available in this file. """ return list(self.datasets.keys()) + list(self.vdata.keys()) def __getattribute__(self, name): try: return object.__getattribute__(self, name) except AttributeError as error: datasets = object.__getattribute__(self, "datasets") if name in datasets: return datasets[name] vdata = object.__getattribute__(self, "vdata") if name in vdata: return vdata[name] raise error def __repr__(self): return f"HDF4File({self.path})"
def __enter__(self): """ Open HDF file and interfaces for use as context manager. :returns: Self. """ self.hdf = HDF(self.fname) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() return self
def get_data(vds, first_record=False, missing_values=None): """ Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the VDS object which is being passed to us can only have come from pyhdf. :param vds: :param first_record: :param missing_values: :return: """ import numpy as np from pyhdf.HDF import HDF, HDF4Error from cis.utils import create_masked_array_for_missing_values # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() if first_record: # FIXME - This is the only bit that is actually different to the baseline vd = vs.attach('metadata') vd.setfields(variable) data = vd.read() else: # get data for that variable vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() # dealing with missing data if missing_values is None: v = _get_attribute_value(vd, 'missing') v = float(v) if v is not None else None missing_values = [v] data = create_masked_array_for_missing_values(data, missing_values) # detach and close vd.detach() vs.end() datafile.close() return data
class Hdf4File(DataProductBase): """ Base class for file products using HDF4File format. The :class:`Hdf4File` wraps around the pyhdf.SD class to implement RAII. """ def __init__(self, filename): """ Open an HDF4 file for reading. Arguments: filename(str): The path to the file to open. """ super().__init__() from pyhdf.HDF import HDF, HC from pyhdf.SD import SD, SDC import pyhdf.VS self.filename = filename self.hdf = HDF(self.filename, HC.READ) self.vs = self.hdf.vstart() self.sd = SD(self.filename, SDC.READ) @property def vs_attributes(self): vs_attributes = [t[0] for t in self.vs.vdatainfo()] return vs_attributes @property def sd_attributes(self): sd_attributes = [t for t in self.sd.datasets()] return sd_attributes @property def attributes(self): return self.vs_attributes + self.sd_attributes def __getitem__(self, name): if name in self.vs_attributes: return self.vs.attach(name) elif name in self.sd_attributes: return self.sd.select(name) else: raise ValueError("{} is not a known attribute of this file.") def __del__(self): self.sd.end() self.vs.end() self.hdf.close()
def __init__(self, filename): """ Open an HDF4 file for reading. Arguments: filename(str): The path to the file to open. """ super().__init__() from pyhdf.HDF import HDF, HC from pyhdf.SD import SD, SDC import pyhdf.VS self.filename = filename self.hdf = HDF(self.filename, HC.READ) self.vs = self.hdf.vstart() self.sd = SD(self.filename, SDC.READ)
def get_data(vds, first_record=False, missing_values=None): """ Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the VDS object which is being passed to us can only have come from pyhdf. :param vds: :param first_record: :param missing_values: :return: """ # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() if first_record: vd = vs.attach(vs.next(-1)) vd.setfields(variable) data = vd.read() else: # get data for that variable vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() # dealing with missing data if missing_values is None: missing_values = [_get_attribute_value(vd, 'missing')] data = create_masked_array_for_missing_values(data, missing_values) # detach and close vd.detach() vs.end() datafile.close() return data
def get_data(vds, first_record=False, missing_values=None): """ Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the VDS object which is being passed to us can only have come from pyhdf. :param vds: :param first_record: :param missing_values: :return: """ # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() if first_record: vd = vs.attach(vs.next(-1)) vd.setfields(variable) data = vd.read() else: # get data for that variable vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() # dealing with missing data if missing_values is None: missing_values = [__get_attribute_value(vd, 'missing')] data = create_masked_array_for_missing_values(data, missing_values) # detach and close vd.detach() vs.end() datafile.close() return data
def open(self): """Open for reading.""" if self.hdf is None: self.hdf = HDF(self.file) self.vs = self.hdf.vstart() # Ignore exceptions telling us there are no VData's try: pass #self.vdinfo = self.vs.vdatainfo() except HDF4Error: pass # Ignore exceptions telling us there are no SDS's try: self.sd = SD(self.file) except HDF4Error: pass
def open(self, view=None, datamodel=None, datamodel_geolocation_dims=None): """Open the HDF file Args: view (dict, optional): a dictionary where keys are dimension names and values are slices. A view can be set on a file, meaning that only the subset defined by this view will be accessible. This view is expressed as any subset (see :func:`get_values`). For example:: view = {'time':slice(0,0), 'lat':slice(200,300), 'lon':slice(200,300)} datamodel (str): type of feature read or written. Internal argument only used by the classes from :mod:`~cerbere.datamodel` package. Can be 'Grid', 'Swath', etc... datamodel_geolocation_dims (list, optional): list of the name of the geolocation dimensions defining the data model to be read in the file. Optional argument, only used by the datamodel classes, in case the mapper class can store different types of data models. Returns: an handler on the opened file """ self.view = view if self.is_writable(): raise NotImplementedError else: if not os.path.exists(self._url): raise Exception("File %s is not existing" % self._url) if (self._url is not None) and (self._mode is not None): logging.debug("MODE : %s", self._mode) self._handler = SD(self._url, MODES[self._mode]) # case of vgroup containing some information if self._mode == 'r': # open HDF file self._hdffile = HDF(self._url, HC.READ) # initialize V interface on HDF file self._vdata = self._hdffile.vstart() return self._handler else: return None
def __init__(self, path): self.path = path self.file_handle = HDF(str(path)) self.scientific_dataset = SD(str(path)) datasets = self.scientific_dataset.datasets() dataset_dict = { key: Dataset(weakref.ref(self), key, *info) for key, info in datasets.items() } self.datasets = dataset_dict self.vdata_table = VS(self.file_handle) vdata_dict = { info[0]: VData(weakref.ref(self), *info) for info in self.vdata_table.vdatainfo() } self.vdata = vdata_dict
def __enter__(self): """ Open HDF file and interfaces for use as context manager. :return self: """ self.hdf = HDF(self.fname) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() return self
def get_metadata(vds): from cis.data_io.ungridded_data import Metadata # get file and variable reference from tuple filename = vds.filename variable = vds.variable datafile = HDF(filename) vs = datafile.vstart() # get data for that variable vd = vs.attach(variable) name = variable misc = vd.attrinfo() long_name = _pop_attribute_value(misc, 'long_name', '') units = _pop_attribute_value(misc, 'units', '') factor = _pop_attribute_value(misc, 'factor') offset = _pop_attribute_value(misc, 'offset') missing = _pop_attribute_value(misc, 'missing') # VD data are always 1D, so the shape is simply the length of the data vector shape = [len(vd.read(nRec=vd.inquire()[0]))] # Tidy up the rest of the data in misc: misc = {k: v[2] for k, v in misc.items()} metadata = Metadata(name=name, long_name=long_name, shape=shape, units=units, factor=factor, offset=offset, missing_value=missing, misc=misc) # detach and close vd.detach() vs.end() datafile.close() return metadata
def _get_cloudsat_vds_data(self, vds): from cis.data_io.hdf_vd import _get_attribute_value, HDF, HDF4Error from cis.utils import create_masked_array_for_missing_data import numpy as np # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() missing_value = _get_attribute_value(vd, 'missing', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) valid_range = _get_attribute_value(vd, "valid_range") if valid_range is not None: # Assume it's the right data type already data = np.ma.masked_outside(data, *valid_range) # TODO This probably won't work.... factor = _get_attribute_value(vd, "factor", 1) offset = _get_attribute_value(vd, "offset", 0) data = self._apply_scaling_factor_CLOUDSAT(data, factor, offset) # detach and close vd.detach() vs.end() datafile.close() return data
def read(filename, variables=None, datadict=None): """ Given a filename and a list of file names return a dictionary of VD data handles :param filename: full path to a single HDF4 file :param variables: A list of variables to read, if no variables are given, no variables are read :param datadict: A dictionary of variable name, data handle pairs to be appended to :return: An updated datadict with any new variables appended. """ if not HDF: raise ImportError( "HDF support was not installed, please reinstall with pyhdf to read HDF files." ) if datadict is None: datadict = {} variables = listify(variables) vs = None datafile = None try: datafile = HDF(filename) vs = datafile.vstart() for variable in variables: try: vd = vs.attach(variable) vd.detach() datadict[variable] = VDS(filename, variable) except: # ignore variable that failed pass finally: if vs is not None: vs.end() if datafile is not None: datafile.close() return datadict
def read(filename, variables=None, datadict=None): """ Given a filename and a list of file names return a dictionary of VD data handles :param filename: full path to a single HDF4 file :param variables: A list of variables to read, if no variables are given, no variables are read :param datadict: A dictionary of variable name, data handle pairs to be appended to :return: An updated datadict with any new variables appended. """ if not HDF: raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.") if datadict is None: datadict = {} variables = listify(variables) vs = None datafile = None try: datafile = HDF(filename) vs = datafile.vstart() for variable in variables: try: vd = vs.attach(variable) vd.detach() datadict[variable] = VDS(filename, variable) except: # ignore variable that failed pass finally: if vs is not None: vs.end() if datafile is not None: datafile.close() return datadict
def parse_ace_data(hdf4_fname, N=1000): """ Load ACE data *hdf4_fname* and return a pandas :class:`DataFrame` with the information. Process *N* lines of the HDF file at a time. """ key = key_from_fname(hdf4_fname) hdf = HDF(hdf4_fname) try: vs = hdf.vstart() vdata = vs.attach(key) fieldinfo = vdata.fieldinfo() loop_divmod = divmod(vdata.inquire()[0], N) fields = [x[0] for x in fieldinfo] data_map = defaultdict(list) for i in range(loop_divmod[0] + 1): try: data = vdata.read(N if i < loop_divmod[0] else loop_divmod[1]) except HDF4Error: break for data_i in data: for data_ii, field in zip(data_i, fields): data_map[field].append(data_ii) finally: vdata.detach() vs.vend() hdf.close() # convert to DataFrame remove_set = set( ['year', 'fp_year', 'day', 'fp_doy', 'hr', 'min', 'sec', 'ACEepoch']) dt = [] for year, day, hr, minute, sec in zip( *[data_map[x] for x in ['year', 'day', 'hr', 'min', 'sec']]): dt.append( datetime(year, 1, 1) + timedelta(days=day - 1, hours=hr, minutes=minute, seconds=sec)) data = {k: v for k, v in data_map.iteritems() if k not in remove_set} df = PD.DataFrame(index=dt, data=data) return df
def dump_cloudsat(filename): """ walk the hdf file and print out information about each vgroup and vdata object Parameters ---------- filename: str or Path object name of hdf file Returns ------- prints information to stdout """ # filename=str(filename) hdf = HDF(filename) # Initialize the SD, V and VS interfaces on the file. sd = SD(filename) vs = hdf.vstart() v = hdf.vgstart() # Scan all vgroups in the file. ref = -1 while 1: try: ref = v.getid(ref) print('vgroup: ',ref) except HDF4Error as msg: # no more vgroup break describevg(ref,v,vs,sd) return None
def get_metadata(vds): from cis.data_io.ungridded_data import Metadata # get file and variable reference from tuple filename = vds.filename variable = vds.variable datafile = HDF(filename) vs = datafile.vstart() # get data for that variable vd = vs.attach(variable) name = variable long_name = __get_attribute_value(vd, 'long_name') # VD data are always 1D, so the shape is simply the length of the data vector shape = [len(vd.read(nRec=vd.inquire()[0]))] units = __get_attribute_value(vd, 'units') valid_range = __get_attribute_value(vd, 'valid_range') factor = __get_attribute_value(vd, 'factor') offset = __get_attribute_value(vd, 'offset') missing = __get_attribute_value(vd, 'missing') # put the whole dictionary of attributes into 'misc' # so that other metadata of interest can still be retrieved if need be misc = vd.attrinfo() metadata = Metadata(name=name, long_name=long_name, shape=shape, units=units, range=valid_range, factor=factor, offset=offset, missing_value=missing, misc=misc) # detach and close vd.detach() vs.end() datafile.close() return metadata
def open(self, view=None, datamodel=None, datamodel_geolocation_dims=None): """Open the HDF file Args: view (dict, optional): a dictionary where keys are dimension names and values are slices. A view can be set on a file, meaning that only the subset defined by this view will be accessible. This view is expressed as any subset (see :func:`get_values`). For example:: view = {'time':slice(0,0), 'lat':slice(200,300), 'lon':slice(200,300)} datamodel (str): type of feature read or written. Internal argument only used by the classes from :mod:`~cerbere.datamodel` package. Can be 'Grid', 'Swath', etc... datamodel_geolocation_dims (list, optional): list of the name of the geolocation dimensions defining the data model to be read in the file. Optional argument, only used by the datamodel classes, in case the mapper class can store different types of data models. Returns: an handler on the opened file """ self.view=view if self.is_writable(): raise NotImplementedError else: if not os.path.exists(self._url): raise Exception("File %s is not existing" % self._url) if (self._url is not None) and (self._mode is not None): logging.debug("MODE : %s", self._mode) self._handler = SD(self._url, MODES[self._mode]) # case of vgroup containing some information if self._mode == 'r': # open HDF file self._hdffile = HDF(self._url, HC.READ) # initialize V interface on HDF file self._vdata = self._hdffile.vstart() return self._handler else: return None
def open(self): """Open for reading.""" if self.hdf is None: self.hdf = HDF(self.file) self.vs = self.hdf.vstart() # Ignore exceptions telling us there are no VData's try: self.vdinfo = self.vs.vdatainfo() except HDF4Error: pass # Ignore exceptions telling us there are no SDS's try: self.sd = SD(self.file) except HDF4Error: pass
def read_amsr_hdf4(filename): from pyhdf.SD import SD, SDC from pyhdf.HDF import HDF # HC import pyhdf.VS retv = AmsrObject() h4file = SD(filename, SDC.READ) # datasets = h4file.datasets() # attributes = h4file.attributes() # for idx, attr in enumerate(attributes.keys()): # print idx, attr for sds in ["Longitude", "Latitude", "High_res_cloud"]: data = h4file.select(sds).get() if sds in ["Longitude", "Latitude"]: retv.all_arrays[sds.lower()] = data.ravel() elif sds in ["High_res_cloud"]: lwp_gain = h4file.select(sds).attributes()['Scale'] retv.all_arrays["lwp_mm"] = data.ravel() * lwp_gain # print h4file.select(sds).info() h4file = HDF(filename, SDC.READ) vs = h4file.vstart() data_info_list = vs.vdatainfo() # print "1D data compound/Vdata" for item in data_info_list: # 1D data compound/Vdata name = item[0] # print name if name in ["Time"]: data_handle = vs.attach(name) data = np.array(data_handle[:]) retv.all_arrays["sec1993"] = data data_handle.detach() else: pass # print name # data = np.array(data_handle[:]) # attrinfo_dic = data_handle.attrinfo() # factor = data_handle.findattr('factor') # offset = data_handle.findattr('offset') # print data_handle.factor # data_handle.detach() # print data_handle.attrinfo() h4file.close() # for key in retv.all_arrays.keys(): # print key, retv.all_arrays[key] return retv
def get_metadata_badccsv_level3(self): self.handler_id = "Hdf handler level 3." spatial = None file_info = self.get_metadata_generic_level1() #First method for extracting information. self.hdf = HDF(self.file_path) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() geospatial = self.get_geospatial() temporal = self.get_temporal() if geospatial is not None: lat_u = self.normalize_coord(float(max(geospatial["Latitude"]))) lat_l = self.normalize_coord(float(min(geospatial["Latitude"]))) lon_u = self.normalize_coord(float(max(geospatial["Longitude"]))) lon_l = self.normalize_coord(float(min(geospatial["Longitude"]))) spatial = {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } } else: #Second method. geospatial = self.get_geolocation() if geospatial is not None: lat_u = self.normalize_coord(float(max(geospatial[0]))) lat_l = self.normalize_coord(float(min(geospatial[0]))) lon_u = self.normalize_coord(float(max(geospatial[1]))) lon_l = self.normalize_coord(float(min(geospatial[1]))) spatial = {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } } if temporal is not None: file_info[0]["info"]["temporal"] = {"start_time": temporal["start_time"], "end_time": temporal["end_time"] } return file_info + (None, spatial, )
class HDFFile(AbstractMapper): ''' Generic storage class for HDF files ''' def __init__(self, url=None, mode=READ_ONLY, **kwargs): """ """ AbstractMapper.__init__(self, url=url, mode=mode, **kwargs) return def open(self, view=None, datamodel=None, datamodel_geolocation_dims=None): """Open the HDF file Args: view (dict, optional): a dictionary where keys are dimension names and values are slices. A view can be set on a file, meaning that only the subset defined by this view will be accessible. This view is expressed as any subset (see :func:`get_values`). For example:: view = {'time':slice(0,0), 'lat':slice(200,300), 'lon':slice(200,300)} datamodel (str): type of feature read or written. Internal argument only used by the classes from :mod:`~cerbere.datamodel` package. Can be 'Grid', 'Swath', etc... datamodel_geolocation_dims (list, optional): list of the name of the geolocation dimensions defining the data model to be read in the file. Optional argument, only used by the datamodel classes, in case the mapper class can store different types of data models. Returns: an handler on the opened file """ self.view=view if self.is_writable(): raise NotImplementedError else: if not os.path.exists(self._url): raise Exception("File %s is not existing" % self._url) if (self._url is not None) and (self._mode is not None): logging.debug("MODE : %s", self._mode) self._handler = SD(self._url, MODES[self._mode]) # case of vgroup containing some information if self._mode == 'r': # open HDF file self._hdffile = HDF(self._url, HC.READ) # initialize V interface on HDF file self._vdata = self._hdffile.vstart() return self._handler else: return None def close(self): self._vdata.end() # terminate V interface self._hdffile.close() self._handler = None self._vdata = None self._hdffile = None return def get_fieldnames(self): ''' Returns the list of geophysical fields stored for the feature ''' fields = self.get_handler().datasets().keys() # remove here time/space information to keep only geophysical fields for field in ['time', 'lat', 'lon']: if field in fields: fields.remove(self.get_geolocation_field(field)) return fields def read_field_attributes(self, fieldname): """ return the specific storage attributes of a variable (_FillValue, scale_factor, add_offset) """ native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname attrs = self.get_handler().select(native_fieldname).attributes() return attrs def get_dimsize(self, dimname): hdfdim = self.get_matching_dimname(dimname) if hdfdim is None: hdfdim = dimname for fieldname in self.get_handler().datasets(): dims = self.get_handler().select(fieldname).dimensions() for dim in dims: if dim == hdfdim: return dims[dim] return None def get_dimensions(self, fieldname=None): """ Return the standard dimension names of a file or a field in the file :keyword fieldname: the field from which to get the dimension names. For a geolocation field, use the cerbere standard name (time, lat, lon), though native field name will work too. :type fieldname: str :return: the standard dimensions of the field or file. :rtype: tuple of strings """ if fieldname is None: raise NotImplementedError else: native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname var = self.get_handler().select(native_fieldname) if var is None: raise Exception("Variable %s not existing in file"\ % native_fieldname) dims = OrderedDict( sorted(var.dimensions(full=True).items(), key=lambda t: t[1][1]) ) dims = [self.get_standard_dimname(dim) for dim in dims] return tuple(dims) def read_field(self, fieldname): namingauth = None native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname varattrs = copy.copy(self.read_field_attributes(fieldname)) if 'long_name' in varattrs: descr = varattrs['long_name'] else: descr = None variable = Variable( shortname=fieldname, description=descr, authority=namingauth, standardname=None ) dims = self.get_full_dimensions(fieldname) TYPE_CONVERT = {'4': numpy.dtype(numpy.int8), '5': numpy.dtype(numpy.float32), '20': numpy.dtype(numpy.int8), '21': numpy.dtype(numpy.uint8), '22': numpy.dtype(numpy.int16), '23': numpy.dtype(numpy.uint16), '24': numpy.dtype(numpy.int32) } typestr = self.get_handler().select(native_fieldname).info()[3] rec = Field( variable, dims, datatype=TYPE_CONVERT[str(typestr)] ) rec.attach_storage(self.get_field_handler(fieldname)) # MetaData rec.units = None if 'units' in varattrs: rec.units = varattrs['units'] rec.valid_min = None rec.valid_max = None rec.attributes = {} if ('valid_min' in varattrs and 'valid_max' in varattrs)\ or 'valid_range' in varattrs: if 'valid_range' in varattrs: rec.valid_min, rec.valid_max = varattrs['valid_range'] else: rec.valid_min = varattrs['valid_min'] rec.valid_max = varattrs['valid_max'] if 'scale_factor' in varattrs: rec.valid_min = rec.valid_min * varattrs['scale_factor'] rec.valid_max = rec.valid_max * varattrs['scale_factor'] if 'add_offset' in varattrs: rec.valid_min = rec.valid_min + varattrs['add_offset'] rec.valid_max = rec.valid_max + varattrs['add_offset'] for att in varattrs: if not att in ['units', 'scale_factor', 'add_offset', '_FillValue', 'valid_min', 'valid_max', 'scale_factor_err', 'add_offset_err', 'valid_range', 'calibrated_nt', 'SDS_type', 'long_name', 'bad_value_scaled', 'bad_value_unscaled']: rec.attributes[att] = varattrs[att] return rec def read_values(self, fieldname, slices=None): native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname var = self.get_handler().select(native_fieldname) if slices is None: values = var.get() else: dims = self.get_full_dimensions(fieldname).keys() newslices = [] # fill in slices with None values for ind, slc in enumerate(slices): i0, i1, step = slc.start, slc.stop, slc.step if i0 is None: i0 = 0 if i1 is None: i1 = self.get_dimsize(dims[ind]) if step is None: step = 1 newslices.append(slice(i0, i1, step)) # Added conversion to int as get does not support long values. slstart = [int(s.start) for s in newslices] slstop = [int(s.stop - s.start) for s in newslices] slstride = [int(s.step) for s in newslices] values = var.get(start=tuple(slstart), count=tuple(slstop), stride=tuple(slstride)) attrs = self.read_field_attributes(fieldname) if '_FillValue' in attrs: fill_value = attrs['_FillValue'] else: fill_value = None if not fill_value is None: values = numpy.ma.array(values, fill_value=fill_value) else: values = numpy.ma.array(values) if 'scale_factor' in attrs: values = values * attrs['scale_factor'] if 'add_offset' in attrs: values = values + attrs['add_offset'] return values def read_global_attributes(self): return self.get_handler().attributes() def read_global_attribute(self, attr): """ """ return self.read_global_attributes()[attr] def write_field(self, fieldname): """ """ raise NotImplementedError def read_fillvalue(self, fieldname): """ """ raise NotImplementedError def create_field(self, field, dim_translation=None): """ """ raise NotImplementedError def create_dim(self, dimname, size=None): """ """ raise NotImplementedError def write_global_attributes(self, attrs): """ write the storage (file) global attributes """ raise NotImplementedError def get_start_time(self): """Returns the minimum date of the file temporal coverage""" raise NotImplementedError def get_end_time(self): """ """ raise NotImplementedError def get_bbox(self): ''' returns the bounding box of the feature, as a tuple (lonmin, latmin, lonmax, latmax) ''' return None def get_spatial_resolution_in_deg(self): """Returns the average spatial resolution in degrees""" return None
class HDFFile(AbstractMapper): ''' Generic storage class for HDF files ''' def __init__(self, url=None, mode=READ_ONLY, **kwargs): """ """ AbstractMapper.__init__(self, url=url, mode=mode, **kwargs) return def open(self, view=None, datamodel=None, datamodel_geolocation_dims=None): """Open the HDF file Args: view (dict, optional): a dictionary where keys are dimension names and values are slices. A view can be set on a file, meaning that only the subset defined by this view will be accessible. This view is expressed as any subset (see :func:`get_values`). For example:: view = {'time':slice(0,0), 'lat':slice(200,300), 'lon':slice(200,300)} datamodel (str): type of feature read or written. Internal argument only used by the classes from :mod:`~cerbere.datamodel` package. Can be 'Grid', 'Swath', etc... datamodel_geolocation_dims (list, optional): list of the name of the geolocation dimensions defining the data model to be read in the file. Optional argument, only used by the datamodel classes, in case the mapper class can store different types of data models. Returns: an handler on the opened file """ self.view = view if self.is_writable(): raise NotImplementedError else: if not os.path.exists(self._url): raise Exception("File %s is not existing" % self._url) if (self._url is not None) and (self._mode is not None): logging.debug("MODE : %s", self._mode) self._handler = SD(self._url, MODES[self._mode]) # case of vgroup containing some information if self._mode == 'r': # open HDF file self._hdffile = HDF(self._url, HC.READ) # initialize V interface on HDF file self._vdata = self._hdffile.vstart() return self._handler else: return None def close(self): self._vdata.end() # terminate V interface self._hdffile.close() self._handler = None self._vdata = None self._hdffile = None return def get_fieldnames(self): ''' Returns the list of geophysical fields stored for the feature ''' fields = self.get_handler().datasets().keys() # remove here time/space information to keep only geophysical fields for field in ['time', 'lat', 'lon']: if field in fields: fields.remove(self.get_geolocation_field(field)) return fields def read_field_attributes(self, fieldname): """ return the specific storage attributes of a variable (_FillValue, scale_factor, add_offset) """ native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname attrs = self.get_handler().select(native_fieldname).attributes() return attrs def get_dimsize(self, dimname): hdfdim = self.get_matching_dimname(dimname) if hdfdim is None: hdfdim = dimname for fieldname in self.get_handler().datasets(): dims = self.get_handler().select(fieldname).dimensions() for dim in dims: if dim == hdfdim: return dims[dim] return None def get_dimensions(self, fieldname=None): """ Return the standard dimension names of a file or a field in the file :keyword fieldname: the field from which to get the dimension names. For a geolocation field, use the cerbere standard name (time, lat, lon), though native field name will work too. :type fieldname: str :return: the standard dimensions of the field or file. :rtype: tuple of strings """ if fieldname is None: raise NotImplementedError else: native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname var = self.get_handler().select(native_fieldname) if var is None: raise Exception("Variable %s not existing in file"\ % native_fieldname) dims = OrderedDict( sorted(var.dimensions(full=True).items(), key=lambda t: t[1][1])) dims = [self.get_standard_dimname(dim) for dim in dims] return tuple(dims) def read_field(self, fieldname): namingauth = None native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname varattrs = copy.copy(self.read_field_attributes(fieldname)) if 'long_name' in varattrs: descr = varattrs['long_name'] else: descr = None variable = Variable(shortname=fieldname, description=descr, authority=namingauth, standardname=None) dims = self.get_full_dimensions(fieldname) TYPE_CONVERT = { '4': numpy.dtype(numpy.int8), '5': numpy.dtype(numpy.float32), '20': numpy.dtype(numpy.int8), '21': numpy.dtype(numpy.uint8), '22': numpy.dtype(numpy.int16), '23': numpy.dtype(numpy.uint16), '24': numpy.dtype(numpy.int32) } typestr = self.get_handler().select(native_fieldname).info()[3] rec = Field(variable, dims, datatype=TYPE_CONVERT[str(typestr)]) rec.attach_storage(self.get_field_handler(fieldname)) # MetaData rec.units = None if 'units' in varattrs: rec.units = varattrs['units'] rec.valid_min = None rec.valid_max = None rec.attributes = {} if ('valid_min' in varattrs and 'valid_max' in varattrs)\ or 'valid_range' in varattrs: if 'valid_range' in varattrs: rec.valid_min, rec.valid_max = varattrs['valid_range'] else: rec.valid_min = varattrs['valid_min'] rec.valid_max = varattrs['valid_max'] if 'scale_factor' in varattrs: rec.valid_min = rec.valid_min * varattrs['scale_factor'] rec.valid_max = rec.valid_max * varattrs['scale_factor'] if 'add_offset' in varattrs: rec.valid_min = rec.valid_min + varattrs['add_offset'] rec.valid_max = rec.valid_max + varattrs['add_offset'] for att in varattrs: if not att in [ 'units', 'scale_factor', 'add_offset', '_FillValue', 'valid_min', 'valid_max', 'scale_factor_err', 'add_offset_err', 'valid_range', 'calibrated_nt', 'SDS_type', 'long_name', 'bad_value_scaled', 'bad_value_unscaled' ]: rec.attributes[att] = varattrs[att] return rec def read_values(self, fieldname, slices=None): native_fieldname = self.get_geolocation_field(fieldname) if native_fieldname is None: native_fieldname = fieldname var = self.get_handler().select(native_fieldname) if slices is None: values = var.get() else: dims = self.get_full_dimensions(fieldname).keys() newslices = [] # fill in slices with None values for ind, slc in enumerate(slices): i0, i1, step = slc.start, slc.stop, slc.step if i0 is None: i0 = 0 if i1 is None: i1 = self.get_dimsize(dims[ind]) if step is None: step = 1 newslices.append(slice(i0, i1, step)) # Added conversion to int as get does not support long values. slstart = [int(s.start) for s in newslices] slstop = [int(s.stop - s.start) for s in newslices] slstride = [int(s.step) for s in newslices] values = var.get(start=tuple(slstart), count=tuple(slstop), stride=tuple(slstride)) attrs = self.read_field_attributes(fieldname) if '_FillValue' in attrs: fill_value = attrs['_FillValue'] else: fill_value = None if not fill_value is None: values = numpy.ma.array(values, fill_value=fill_value) else: values = numpy.ma.array(values) if 'scale_factor' in attrs: values = values * attrs['scale_factor'] if 'add_offset' in attrs: values = values + attrs['add_offset'] return values def read_global_attributes(self): return self.get_handler().attributes() def read_global_attribute(self, attr): """ """ return self.read_global_attributes()[attr] def write_field(self, fieldname): """ """ raise NotImplementedError def read_fillvalue(self, fieldname): """ """ raise NotImplementedError def create_field(self, field, dim_translation=None): """ """ raise NotImplementedError def create_dim(self, dimname, size=None): """ """ raise NotImplementedError def write_global_attributes(self, attrs): """ write the storage (file) global attributes """ raise NotImplementedError def get_start_time(self): """Returns the minimum date of the file temporal coverage""" raise NotImplementedError def get_end_time(self): """ """ raise NotImplementedError def get_bbox(self): ''' returns the bounding box of the feature, as a tuple (lonmin, latmin, lonmax, latmax) ''' return None def get_spatial_resolution_in_deg(self): """Returns the average spatial resolution in degrees""" return None
class HdfFile(object): """Class implementing HDF file access.""" GEOLOC_FIELDS = () def __init__(self, file): """Constructor.""" self.file = file self.hdf = None self.vs = None self.vdinfo = None self.sd = None self.savedVarsDict = None self.open() # Permit data files without VData's if type(self.vdinfo) != type(None): self.vdList = [i[0] for i in self.vdinfo] #print self.vdList # Permit data files without SDS's if type(self.sd) != type(None): self.datasetList = self.sd.datasets().keys() #print self.datasetList self.levels = {} self.geoDict = self._getGeoDict() self.dataDict = self._getDataDict() self.close() # Always define in subclass def _getGeoDict(self): raise NotImplementedError("Not implemented.") # Always define in subclass, except for cloudsat def _getDataDict(self): return None def open(self): """Open for reading.""" if self.hdf is None: self.hdf = HDF(self.file) self.vs = self.hdf.vstart() # Ignore exceptions telling us there are no VData's try: self.vdinfo = self.vs.vdatainfo() except HDF4Error: pass # Ignore exceptions telling us there are no SDS's try: self.sd = SD(self.file) except HDF4Error: pass def close(self): """Close hdf file.""" if hasattr(self, 'hdf') and self.hdf is not None: self.vs.end() self.hdf.close() self.sd.end() self.hdf = None self.vs = None self.vdinfo = None self.sd = None def getGeo(self): return self.geoDict def get(self, var): """Return variable array dict.""" self.open() #get list of vars if isinstance(var, types.StringTypes): var = [var] elif isinstance(var, (types.ListType, types.TupleType)): pass elif var is None: # If we don't have any SDS's, go with the vdata's only # if we don't have any vdata's go with SDS's only try: var = self.vdList; try: var.extend(self.datasetList) except AttributeError: pass except AttributeError: var = self.datasetList else: raise RuntimeError("Incorrect argument type for %s." % var) #create dict of (attrs, array) for each var a = {} for v in var: if v=='': continue # added by bytang #handle SD types ds = None if v in self.datasetList: try: ds = self.sd.select(v) a[v] = (ds.attributes(), N.array(ds.get())) continue except HDF4Error, e: pass finally: if ds is not None: ds.endaccess()
class HDF4(_geospatial): """ HDF4 context manager class. """ hdf = None vs = None v = None def __init__(self, fname): """ :param str fname: The path of the HDF4 file. """ self.fname = str(fname) def __enter__(self): """ Open HDF file and interfaces for use as context manager. :returns: Self. """ self.hdf = HDF(self.fname) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() return self def __exit__(self, *args): """ Close interfaces and HDF file after finishing use in context manager. """ self.v.end() self.vs.end() self.hdf.close() def _get_coords(self, vs, fn): """ Iterate through vgroup and return a list of coordinates (if existing). :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing geospatial information. """ mappings = { "NVlat2": "lat", "NVlng2": "lon", } coords = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) coords[v] = [] while True: try: coord = float(vd.read()[0][0]) coord /= 10**7 coords[v].append(coord) except HDF4Error: # End of file break vd.detach() return coords def _get_temporal(self, vs, fn): """ Return start and end timestamps (if existing) :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing temporal information. """ mappings = { "MIdate": "date", "MIstime": "start_time", "MIetime": "end_time", } timestamps = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) timestamps[v] = [] while True: try: timestamps[v].append(vd.read()[0][0]) except HDF4Error: # EOF break vd.detach() # This list comprehension basically converts from a list of integers # into a list of chars and joins them together to make strings # ... # If unclear - HDF text data comes out as a list of integers, e.g.: # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world") # Those "char" numbers get converted to strings with this snippet. dates = [chr(x) for x in timestamps["date"] if x != 0] timestamps["date"] = ''.join(dates) return self._parse_timestamps(timestamps) def _parse_timestamps(self, tm_dict): """ Parse start and end timestamps from an HDF4 file. :param dict tm_dict: The timestamp to be parsed :returns: Dict containing start and end timestamps """ st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0])) et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0])) for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]: try: start_time = datetime.datetime.strptime(st_base, t_format) end_time = datetime.datetime.strptime(et_base, t_format) except ValueError: # ValueError will be raised if strptime format doesn't match # the actual timestamp - so just try the next strptime format continue return { "start_time": start_time.isoformat(), "end_time": end_time.isoformat() } def get_geospatial(self): """ Search through HDF4 file, returning a list of coordinates from the 'Navigation' vgroup (if it exists). :returns: Dict containing geospatial information. """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Navigation": geospatial = self._get_coords(self.vs, self.fname) geospatial["type"] = "track" # Type annotation vg.detach() return geospatial vg.detach() except HDF4Error: # End of file # This is a weird way of handling files, but this is what the # pyhdf library demonstrates... break return None def get_temporal(self): """ Search through HDF4 file, returning timestamps from the 'Mission' vgroup (if it exists) :returns: List containing temporal metadata """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Mission": temporal = self._get_temporal(self.vs, self.fname) vg.detach() return temporal vg.detach() except HDF4Error: # End of file # This 'except at end of file' thing is some pyhdf weirdness # Check the pyhdf documentation for clarification break return None def get_properties(self): """ Returns ceda_di.metadata.properties.Properties object containing geospatial and temporal metadata from file. :returns: Metadata.product.Properties object """ geospatial = self.get_geospatial() temporal = self.get_temporal() filesystem = super(HDF4, self).get_filesystem(self.fname) data_format = { "format": "HDF4", } instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"]) flight_info = arsf.Hyperspectral.get_flight_info( filesystem["filename"]) props = product.Properties(spatial=geospatial, temporal=temporal, filesystem=filesystem, data_format=data_format, instrument=instrument, flight_info=flight_info) return props
def HDFread(filename, variable, vgroup=None): """ Extract the data for non-scientific data in V mode of hdf file """ if vgroup is None: vgroup = 'Geolocation Fields' filename=str(filename) hdf = HDF(filename, HC.READ) # Initialize the SD, V and VS interfaces on the file. sd = SD(filename) vs = hdf.vstart() v = hdf.vgstart() vg_dict={} ref = -1 while 1: try: ref = v.getid(ref) #print('vgroup ref number: ',ref) except HDF4Error as msg: # no more vgroup break vg = v.attach(ref) # print("----------------") # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ') # print(vg._tag, vg._refnum) vg_dict[vg._name]=(vg._tag, vg._refnum) vg.detach() tag, ref = vg_dict[vgroup] # Open all data of the class vg = v.attach(ref) # print("----------------") # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ') # print(vg._tag, vg._refnum) # All fields in the class members = vg.tagrefs() nrecs = [] names = [] for tag, ref in members: # Vdata tag if tag == HC.DFTAG_VH: vd = vs.attach(ref) nrec, intmode, fields, size, name = vd.inquire() nrecs.append(nrec) names.append(name) vd.detach() try: idx = names.index(variable) except ValueError: error=f'{variable} is not in {names} for vgroup {vgroup}' raise ValueError(error) var = vs.attach(members[idx][1]) V = var.read(nrecs[idx]) var.detach() # Terminate V, VS and SD interfaces. v.end() vs.end() sd.end() # Close HDF file. hdf.close() return np.asarray(V)
def open_dataset_hdf(filename, variables=None, drop_variables=[]): da_dict = {} # First read SD (scientific datasets) sd = SD(filename) if variables is None: data_vars = sd.datasets().keys() else: data_vars = variables for dname in data_vars: if dname in drop_variables: continue if dname not in sd.datasets().keys(): continue sds = sd.select(dname) # get (masked) data d = np.where(sds[:] != sds.getfillvalue(), sds[:], np.nan) # check for more masks if 'missing' in sds.attributes(): d[d == sds.missing] = np.nan # unpack data if 'offset' in sds.attributes() and 'factor' in sds.attributes(): d = d / sds.factor + sds.offset # coordinate variables...how to do this?! Look for VDATA? # just save as DataArray for now, without coordinate variables... dims = [sds.dim(i).info()[0] for i in range(len(sds.dimensions()))] da_dict[dname] = xr.DataArray(d, dims=dims, attrs=sds.attributes(), name=dname) # Close this dataset sds.endaccess() # Close file sd.end() # ...now read VDATA... hdf = HDF(filename) vs = hdf.vstart() if variables is None: data_vars, *__ = zip(*vs.vdatainfo()) else: data_vars = variables for vname in data_vars: if vname in drop_variables: continue if vname not in [v[0] for v in vs.vdatainfo()]: continue # attach vdata vd = vs.attach(vname) # get vdata info nrec, mode, fields, *__ = vd.inquire() if nrec == 0: vd.detach() continue # read data d = np.array(vd[:]).squeeze() # make sure not to overwrite coordinate variables if all([vname not in da.dims for v, da in da_dict.items()]): da_dict[vname] = xr.DataArray(d) vd.detach() # clean up vs.end() # HDF files do not always close cleanly, so close manually hdf.close() return xr.Dataset(da_dict)
class HdfFile(object): """Class implementing HDF file access.""" GEOLOC_FIELDS = () def __init__(self, file): """Constructor.""" self.file = file self.hdf = None self.vs = None self.vdinfo = None self.sd = None self.savedVarsDict = None self.open() # Permit data files without VData's if type(self.vdinfo) != type(None): self.vdList = [i[0] for i in self.vdinfo] #print self.vdList # Permit data files without SDS's if type(self.sd) != type(None): self.datasetList = self.sd.datasets().keys() #print self.datasetList self.levels = {} self.geoDict = self._getGeoDict() self.dataDict = self._getDataDict() self.close() # Always define in subclass def _getGeoDict(self): raise NotImplementedError("Not implemented.") # Always define in subclass, except for cloudsat def _getDataDict(self): return None def open(self): """Open for reading.""" if self.hdf is None: self.hdf = HDF(self.file) self.vs = self.hdf.vstart() # Ignore exceptions telling us there are no VData's try: pass #self.vdinfo = self.vs.vdatainfo() except HDF4Error: pass # Ignore exceptions telling us there are no SDS's try: self.sd = SD(self.file) except HDF4Error: pass def close(self): """Close hdf file.""" if hasattr(self, 'hdf') and self.hdf is not None: self.vs.end() self.hdf.close() self.sd.end() self.hdf = None self.vs = None self.vdinfo = None self.sd = None def getGeo(self): return self.geoDict def get(self, var): """Return variable array dict.""" self.open() #get list of vars if isinstance(var, types.StringTypes): var = [var] elif isinstance(var, (types.ListType, types.TupleType)): pass elif var is None: # If we don't have any SDS's, go with the vdata's only # if we don't have any vdata's go with SDS's only try: var = self.vdList try: var.extend(self.datasetList) except AttributeError: pass except AttributeError: var = self.datasetList else: raise RuntimeError("Incorrect argument type for %s." % var) #create dict of (attrs, array) for each var a = {} for v in var: #handle SD types ds = None if v in self.datasetList: try: ds = self.sd.select(v) a[v] = (ds.attributes(), N.array(ds.get())) continue except HDF4Error, e: pass finally: if ds is not None: ds.endaccess()
class HDF4(_geospatial): """ HDF4 context manager class. """ hdf = None vs = None v = None def __init__(self, fname): """ :param str fname: The path of the HDF4 file. """ self.fname = str(fname) def __enter__(self): """ Open HDF file and interfaces for use as context manager. :returns: Self. """ self.hdf = HDF(self.fname) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() return self def __exit__(self, *args): """ Close interfaces and HDF file after finishing use in context manager. """ self.v.end() self.vs.end() self.hdf.close() def _get_coords(self, vs, fn): """ Iterate through vgroup and return a list of coordinates (if existing). :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing geospatial information. """ mappings = { "NVlat2": "lat", "NVlng2": "lon", } coords = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) coords[v] = [] while True: try: coord = float(vd.read()[0][0]) coord /= 10**7 coords[v].append(coord) except HDF4Error: # End of file break vd.detach() return coords def _get_temporal(self, vs, fn): """ Return start and end timestamps (if existing) :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing temporal information. """ mappings = { "MIdate": "date", "MIstime": "start_time", "MIetime": "end_time", } timestamps = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) timestamps[v] = [] while True: try: timestamps[v].append(vd.read()[0][0]) except HDF4Error: # EOF break vd.detach() # This list comprehension basically converts from a list of integers # into a list of chars and joins them together to make strings # ... # If unclear - HDF text data comes out as a list of integers, e.g.: # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world") # Those "char" numbers get converted to strings with this snippet. dates = [chr(x) for x in timestamps["date"] if x != 0] timestamps["date"] = ''.join(dates) return self._parse_timestamps(timestamps) def _parse_timestamps(self, tm_dict): """ Parse start and end timestamps from an HDF4 file. :param dict tm_dict: The timestamp to be parsed :returns: Dict containing start and end timestamps """ st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0])) et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0])) for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]: try: start_time = datetime.datetime.strptime(st_base, t_format) end_time = datetime.datetime.strptime(et_base, t_format) except ValueError: # ValueError will be raised if strptime format doesn't match # the actual timestamp - so just try the next strptime format continue return {"start_time": start_time.isoformat(), "end_time": end_time.isoformat()} def get_geospatial(self): """ Search through HDF4 file, returning a list of coordinates from the 'Navigation' vgroup (if it exists). :returns: Dict containing geospatial information. """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Navigation": geospatial = self._get_coords(self.vs, self.fname) geospatial["type"] = "track" # Type annotation vg.detach() return geospatial vg.detach() except HDF4Error: # End of file # This is a weird way of handling files, but this is what the # pyhdf library demonstrates... break return None def get_temporal(self): """ Search through HDF4 file, returning timestamps from the 'Mission' vgroup (if it exists) :returns: List containing temporal metadata """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Mission": temporal = self._get_temporal(self.vs, self.fname) vg.detach() return temporal vg.detach() except HDF4Error: # End of file # This 'except at end of file' thing is some pyhdf weirdness # Check the pyhdf documentation for clarification break return None def get_properties(self): """ Returns ceda_di.metadata.properties.Properties object containing geospatial and temporal metadata from file. :returns: Metadata.product.Properties object """ geospatial = self.get_geospatial() temporal = self.get_temporal() filesystem = super(HDF4, self).get_filesystem(self.fname) data_format = { "format": "HDF4", } instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"]) flight_info = arsf.Hyperspectral.get_flight_info(filesystem["filename"]) props = product.Properties(spatial=geospatial, temporal=temporal, filesystem=filesystem, data_format=data_format, instrument=instrument, flight_info=flight_info) return props
class HdfFile(GenericFile): def __init__(self, file_path, level, additional_param=None): GenericFile.__init__(self, file_path, level) self.handler_id = "hdf2." self.FILE_FORMAT = "hdf2." #hdf = None #vs = None #v = None def get_handler_id(self): return self.handler_id def _get_coords(self, vs, fn): """ Iterate through vgroup and return a list of coordinates (if existing). :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing geospatial information. """ mappings = { "NVlat2": "Latitude", "NVlng2": "Longitude", } coords = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) coords[v] = [] while True: try: coord = float(vd.read()[0][0]) coord /= 10**7 coords[v].append(coord) except HDF4Error: # End of file break vd.detach() return coords def _get_temporal(self, vs, fn): """ Return start and end timestamps (if existing) :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing temporal information. """ mappings = { "MIdate": "date", "MIstime": "start_time", "MIetime": "end_time", } timestamps = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) timestamps[v] = [] while True: try: timestamps[v].append(vd.read()[0][0]) except HDF4Error: # EOF break vd.detach() # This list comprehension basically converts from a list of integers # into a list of chars and joins them together to make strings # ... # If unclear - HDF text data comes out as a list of integers, e.g.: # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world") # Those "char" numbers get converted to strings with this snippet. dates = [chr(x) for x in timestamps["date"] if x != 0] timestamps["date"] = ''.join(dates) return self._parse_timestamps(timestamps) def _parse_timestamps(self, tm_dict): """ Parse start and end timestamps from an HDF4 file. :param dict tm_dict: The timestamp to be parsed :returns: Dict containing start and end timestamps """ st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0])) et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0])) for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]: try: start_time = datetime.datetime.strptime(st_base, t_format) end_time = datetime.datetime.strptime(et_base, t_format) except ValueError: # ValueError will be raised if strptime format doesn't match # the actual timestamp - so just try the next strptime format continue return {"start_time": start_time.isoformat(), "end_time": end_time.isoformat()} def get_geospatial(self): """ Search through HDF4 file, returning a list of coordinates from the 'Navigation' vgroup (if it exists). :returns: Dict containing geospatial information. """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Navigation": geospatial = self._get_coords(self.vs, self.file_path) geospatial["type"] = "track" # Type annotation vg.detach() return geospatial vg.detach() except HDF4Error: # End of file # This is a weird way of handling files, but this is what the # pyhdf library demonstrates... break return None def get_temporal(self): """ Search through HDF4 file, returning timestamps from the 'Mission' vgroup (if it exists) :returns: List containing temporal metadata """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Mission": temporal = self._get_temporal(self.vs, self.file_path) vg.detach() return temporal vg.detach() except HDF4Error: # End of file # This 'except at end of file' thing is some pyhdf weirdness # Check the pyhdf documentation for clarification break return None def get_phenomena(self, fp): phen_list = [] return phen_list def get_metadata_badccsv_level2(self): return None def get_geolocation(self): # Open file. hdf = SD(self.file_path, SDC.READ) # List available SDS datasets. datasets = hdf.datasets() # Read dataset. #DATAFIELD_NAME='RelHumid_A' #data3D = hdf.select(DATAFIELD_NAME) #data = data3D[11,:,:] # Read geolocation dataset. try: lat = hdf.select('Latitude') latitude = lat[:,:].flatten() lon = hdf.select('Longitude') longitude = lon[:,:].flatten() return (latitude, longitude) except HDF4Error: return None def normalize_coord(self, coord): if coord < -180: coord = 0 return coord def get_metadata_badccsv_level3(self): self.handler_id = "Hdf handler level 3." spatial = None file_info = self.get_metadata_generic_level1() #First method for extracting information. self.hdf = HDF(self.file_path) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() geospatial = self.get_geospatial() temporal = self.get_temporal() if geospatial is not None: lat_u = self.normalize_coord(float(max(geospatial["Latitude"]))) lat_l = self.normalize_coord(float(min(geospatial["Latitude"]))) lon_u = self.normalize_coord(float(max(geospatial["Longitude"]))) lon_l = self.normalize_coord(float(min(geospatial["Longitude"]))) spatial = {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } } else: #Second method. geospatial = self.get_geolocation() if geospatial is not None: lat_u = self.normalize_coord(float(max(geospatial[0]))) lat_l = self.normalize_coord(float(min(geospatial[0]))) lon_u = self.normalize_coord(float(max(geospatial[1]))) lon_l = self.normalize_coord(float(min(geospatial[1]))) spatial = {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } } if temporal is not None: file_info[0]["info"]["temporal"] = {"start_time": temporal["start_time"], "end_time": temporal["end_time"] } return file_info + (None, spatial, ) def get_metadata(self): if self.level == "1": res = self.get_metadata_generic_level1() elif self.level == "2": res = self.get_metadata_generic_level1() elif self.level == "3": res = self.get_metadata_badccsv_level3() res[0]["info"]["format"] = self.FILE_FORMAT return res def __enter__(self): return self def __exit__(self, *args): pass