Exemplo n.º 1
0
def dump_cloudsat(filename):
    """
    walk the hdf file and print out
    information about each vgroup and vdata
    object

    Parameters
    ----------

    filename: str or Path object
        name of hdf file

    Returns
    -------

    prints information to stdout
    """
    #
    
    filename=str(filename)
    hdf = HDF(filename)

    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()

    # Scan all vgroups in the file.
    ref = -1
    while 1:
        try:
            ref = v.getid(ref)
            print('vgroup: ',ref)
        except HDF4Error as msg:    # no more vgroup
            break
        describevg(ref,v,vs,sd)
    return None
Exemplo n.º 2
0
class HDF4(_geospatial):
    """
    HDF4 context manager class.
    """
    hdf = None
    vs = None
    v = None

    def __init__(self, fname):
        """
        :param str fname: The path of the HDF4 file.
        """
        self.fname = str(fname)

    def __enter__(self):
        """
        Open HDF file and interfaces for use as context manager.

        :returns: Self.
        """
        self.hdf = HDF(self.fname)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        return self

    def __exit__(self, *args):
        """
        Close interfaces and HDF file after finishing use in context manager.
        """
        self.v.end()
        self.vs.end()
        self.hdf.close()

    def _get_coords(self, vs, fn):
        """
        Iterate through vgroup and return a list of coordinates (if existing).

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing geospatial information.
        """
        mappings = {
            "NVlat2": "lat",
            "NVlng2": "lon",
        }

        coords = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            coords[v] = []
            while True:
                try:
                    coord = float(vd.read()[0][0])
                    coord /= 10**7
                    coords[v].append(coord)
                except HDF4Error:  # End of file
                    break

            vd.detach()
        return coords

    def _get_temporal(self, vs, fn):
        """
        Return start and end timestamps (if existing)

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing temporal information.
        """
        mappings = {
            "MIdate": "date",
            "MIstime": "start_time",
            "MIetime": "end_time",
        }

        timestamps = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            timestamps[v] = []
            while True:
                try:
                    timestamps[v].append(vd.read()[0][0])
                except HDF4Error:  # EOF
                    break

            vd.detach()

        # This list comprehension basically converts from a list of integers
        # into a list of chars and joins them together to make strings
        # ...
        # If unclear - HDF text data comes out as a list of integers, e.g.:
        # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world")
        # Those "char" numbers get converted to strings with this snippet.
        dates = [chr(x) for x in timestamps["date"] if x != 0]
        timestamps["date"] = ''.join(dates)

        return self._parse_timestamps(timestamps)

    def _parse_timestamps(self, tm_dict):
        """
        Parse start and end timestamps from an HDF4 file.

        :param dict tm_dict: The timestamp to be parsed
        :returns: Dict containing start and end timestamps
        """
        st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0]))
        et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0]))

        for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]:
            try:
                start_time = datetime.datetime.strptime(st_base, t_format)
                end_time = datetime.datetime.strptime(et_base, t_format)
            except ValueError:
                # ValueError will be raised if strptime format doesn't match
                # the actual timestamp - so just try the next strptime format
                continue

        return {"start_time": start_time.isoformat(),
                "end_time": end_time.isoformat()}

    def get_geospatial(self):
        """
        Search through HDF4 file, returning a list of coordinates from the
        'Navigation' vgroup (if it exists).

        :returns: Dict containing geospatial information.
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Navigation":
                    geospatial = self._get_coords(self.vs, self.fname)
                    geospatial["type"] = "track"  # Type annotation
                    vg.detach()
                    return geospatial

                vg.detach()
            except HDF4Error:  # End of file
                # This is a weird way of handling files, but this is what the
                # pyhdf library demonstrates...
                break

        return None

    def get_temporal(self):
        """
        Search through HDF4 file, returning timestamps from the 'Mission'
        vgroup (if it exists)

        :returns: List containing temporal metadata
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Mission":
                    temporal = self._get_temporal(self.vs, self.fname)
                    vg.detach()
                    return temporal

                vg.detach()
            except HDF4Error:  # End of file
                # This 'except at end of file' thing is some pyhdf weirdness
                # Check the pyhdf documentation for clarification
                break

        return None

    def get_properties(self):
        """
        Returns ceda_di.metadata.properties.Properties object
        containing geospatial and temporal metadata from file.

        :returns: Metadata.product.Properties object
        """
        geospatial = self.get_geospatial()
        temporal = self.get_temporal()
        filesystem = super(HDF4, self).get_filesystem(self.fname)
        data_format = {
            "format": "HDF4",
        }

        instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"])
        flight_info = arsf.Hyperspectral.get_flight_info(filesystem["filename"])
        props = product.Properties(spatial=geospatial,
                                   temporal=temporal,
                                   filesystem=filesystem,
                                   data_format=data_format,
                                   instrument=instrument,
                                   flight_info=flight_info)

        return props
Exemplo n.º 3
0
class HdfFile(GenericFile):

    def __init__(self, file_path, level, additional_param=None):
        GenericFile.__init__(self, file_path, level)
        self.handler_id = "hdf2."
        self.FILE_FORMAT = "hdf2."
        #hdf = None
        #vs = None
        #v = None

    def get_handler_id(self):
        return self.handler_id

    def _get_coords(self, vs, fn):
        """
        Iterate through vgroup and return a list of coordinates (if existing).
        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing geospatial information.
        """
        mappings = {
            "NVlat2": "Latitude",
            "NVlng2": "Longitude",
        }

        coords = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            coords[v] = []
            while True:
                try:
                    coord = float(vd.read()[0][0])
                    coord /= 10**7
                    coords[v].append(coord)
                except HDF4Error:  # End of file
                    break

            vd.detach()
        return coords

    def _get_temporal(self, vs, fn):
        """
        Return start and end timestamps (if existing)
        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing temporal information.
        """
        mappings = {
            "MIdate": "date",
            "MIstime": "start_time",
            "MIetime": "end_time",
        }

        timestamps = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            timestamps[v] = []
            while True:
                try:
                    timestamps[v].append(vd.read()[0][0])
                except HDF4Error:  # EOF
                    break

            vd.detach()

        # This list comprehension basically converts from a list of integers
        # into a list of chars and joins them together to make strings
        # ...
        # If unclear - HDF text data comes out as a list of integers, e.g.:
        # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world")
        # Those "char" numbers get converted to strings with this snippet.
        dates = [chr(x) for x in timestamps["date"] if x != 0]
        timestamps["date"] = ''.join(dates)

        return self._parse_timestamps(timestamps)

    def _parse_timestamps(self, tm_dict):
        """
        Parse start and end timestamps from an HDF4 file.
        :param dict tm_dict: The timestamp to be parsed
        :returns: Dict containing start and end timestamps
        """
        st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0]))
        et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0]))

        for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]:
            try:
                start_time = datetime.datetime.strptime(st_base, t_format)
                end_time = datetime.datetime.strptime(et_base, t_format)
            except ValueError:
                # ValueError will be raised if strptime format doesn't match
                # the actual timestamp - so just try the next strptime format
                continue

        return {"start_time": start_time.isoformat(),
                "end_time": end_time.isoformat()}

    def get_geospatial(self):
        """
        Search through HDF4 file, returning a list of coordinates from the
        'Navigation' vgroup (if it exists).
        :returns: Dict containing geospatial information.
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Navigation":
                    geospatial = self._get_coords(self.vs, self.file_path)
                    geospatial["type"] = "track"  # Type annotation
                    vg.detach()
                    return geospatial

                vg.detach()
            except HDF4Error:  # End of file
                # This is a weird way of handling files, but this is what the
                # pyhdf library demonstrates...
                break

        return None

    def get_temporal(self):
        """
        Search through HDF4 file, returning timestamps from the 'Mission'
        vgroup (if it exists)
        :returns: List containing temporal metadata
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Mission":
                    temporal = self._get_temporal(self.vs, self.file_path)
                    vg.detach()
                    return temporal

                vg.detach()
            except HDF4Error:  # End of file
                # This 'except at end of file' thing is some pyhdf weirdness
                # Check the pyhdf documentation for clarification
                break

        return None

    def get_phenomena(self, fp):
        phen_list = []
        return phen_list

    def get_metadata_badccsv_level2(self):
        return None

    def get_geolocation(self):
        # Open file.

        hdf = SD(self.file_path, SDC.READ)

        # List available SDS datasets.
        datasets = hdf.datasets()

        # Read dataset.
        #DATAFIELD_NAME='RelHumid_A'
        #data3D = hdf.select(DATAFIELD_NAME)
        #data = data3D[11,:,:]

        # Read geolocation dataset.
        try:
            lat = hdf.select('Latitude')
            latitude = lat[:,:].flatten()
            lon = hdf.select('Longitude')
            longitude = lon[:,:].flatten()
            return (latitude, longitude)
        except HDF4Error:
            return None
 
    def normalize_coord(self, coord):
        if coord < -180:
            coord = 0

        return coord

    def get_metadata_badccsv_level3(self):
        self.handler_id = "Hdf handler level 3."
        spatial = None

        file_info = self.get_metadata_generic_level1()

        #First method for extracting information.
        self.hdf = HDF(self.file_path)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        geospatial = self.get_geospatial()
        temporal = self.get_temporal()


        if geospatial is not None:
            lat_u = self.normalize_coord(float(max(geospatial["Latitude"])))
            lat_l = self.normalize_coord(float(min(geospatial["Latitude"])))

            lon_u = self.normalize_coord(float(max(geospatial["Longitude"])))
            lon_l = self.normalize_coord(float(min(geospatial["Longitude"])))

            spatial =  {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } }
        else:
            #Second method.
            geospatial = self.get_geolocation()

            if geospatial is not None:
                lat_u = self.normalize_coord(float(max(geospatial[0])))
                lat_l = self.normalize_coord(float(min(geospatial[0])))

                lon_u = self.normalize_coord(float(max(geospatial[1])))
                lon_l = self.normalize_coord(float(min(geospatial[1])))

                spatial =  {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } }


        if temporal is not None:
            file_info[0]["info"]["temporal"] = {"start_time": temporal["start_time"], "end_time": temporal["end_time"] }


        return file_info + (None, spatial, )

    def get_metadata(self):

        if self.level == "1":
            res = self.get_metadata_generic_level1()
        elif self.level == "2":
            res = self.get_metadata_generic_level1()
        elif self.level == "3":
            res = self.get_metadata_badccsv_level3()

        res[0]["info"]["format"] = self.FILE_FORMAT

        return res

    def __enter__(self):
        return self

    def __exit__(self, *args):
        pass
Exemplo n.º 4
0
def HDFread(filename, variable, vgroup=None):
    """
    Extract the data for non-scientific data in V mode of hdf file
    """
    if vgroup is None:
        vgroup = 'Geolocation Fields'
        
    filename=str(filename)
    hdf = HDF(filename, HC.READ)

    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()
    vg_dict={}
    ref = -1
    while 1:
        try:
            ref = v.getid(ref)
            #print('vgroup ref number: ',ref)
        except HDF4Error as msg:    # no more vgroup
            break
        vg = v.attach(ref)
        # print("----------------")
        # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ')
        # print(vg._tag, vg._refnum)
        vg_dict[vg._name]=(vg._tag, vg._refnum)
        vg.detach()
        
    tag, ref = vg_dict[vgroup]

    # Open all data of the class
    vg = v.attach(ref)
    # print("----------------")
    # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ')
    # print(vg._tag, vg._refnum)

    # All fields in the class
    members = vg.tagrefs()

    nrecs = []
    names = []
    for tag, ref in members:
        # Vdata tag
        if tag == HC.DFTAG_VH:
            vd = vs.attach(ref)
            nrec, intmode, fields, size, name = vd.inquire()
            nrecs.append(nrec)
            names.append(name)
            vd.detach()
    try:
        idx = names.index(variable)
    except ValueError:
        error=f'{variable} is not in {names} for vgroup {vgroup}'
        raise ValueError(error)
        
    var = vs.attach(members[idx][1])
    V   = var.read(nrecs[idx])
    var.detach()
    # Terminate V, VS and SD interfaces.
    v.end()
    vs.end()
    sd.end()
    # Close HDF file.
    hdf.close()
    return np.asarray(V)
Exemplo n.º 5
0
class HDF4(_geospatial):
    """
    HDF4 context manager class.
    """
    hdf = None
    vs = None
    v = None

    def __init__(self, fname):
        """
        :param str fname: The path of the HDF4 file.
        """
        self.fname = str(fname)

    def __enter__(self):
        """
        Open HDF file and interfaces for use as context manager.

        :returns: Self.
        """
        self.hdf = HDF(self.fname)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        return self

    def __exit__(self, *args):
        """
        Close interfaces and HDF file after finishing use in context manager.
        """
        self.v.end()
        self.vs.end()
        self.hdf.close()

    def _get_coords(self, vs, fn):
        """
        Iterate through vgroup and return a list of coordinates (if existing).

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing geospatial information.
        """
        mappings = {
            "NVlat2": "lat",
            "NVlng2": "lon",
        }

        coords = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            coords[v] = []
            while True:
                try:
                    coord = float(vd.read()[0][0])
                    coord /= 10**7
                    coords[v].append(coord)
                except HDF4Error:  # End of file
                    break

            vd.detach()
        return coords

    def _get_temporal(self, vs, fn):
        """
        Return start and end timestamps (if existing)

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing temporal information.
        """
        mappings = {
            "MIdate": "date",
            "MIstime": "start_time",
            "MIetime": "end_time",
        }

        timestamps = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            timestamps[v] = []
            while True:
                try:
                    timestamps[v].append(vd.read()[0][0])
                except HDF4Error:  # EOF
                    break

            vd.detach()

        # This list comprehension basically converts from a list of integers
        # into a list of chars and joins them together to make strings
        # ...
        # If unclear - HDF text data comes out as a list of integers, e.g.:
        # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world")
        # Those "char" numbers get converted to strings with this snippet.
        dates = [chr(x) for x in timestamps["date"] if x != 0]
        timestamps["date"] = ''.join(dates)

        return self._parse_timestamps(timestamps)

    def _parse_timestamps(self, tm_dict):
        """
        Parse start and end timestamps from an HDF4 file.

        :param dict tm_dict: The timestamp to be parsed
        :returns: Dict containing start and end timestamps
        """
        st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0]))
        et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0]))

        for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]:
            try:
                start_time = datetime.datetime.strptime(st_base, t_format)
                end_time = datetime.datetime.strptime(et_base, t_format)
            except ValueError:
                # ValueError will be raised if strptime format doesn't match
                # the actual timestamp - so just try the next strptime format
                continue

        return {
            "start_time": start_time.isoformat(),
            "end_time": end_time.isoformat()
        }

    def get_geospatial(self):
        """
        Search through HDF4 file, returning a list of coordinates from the
        'Navigation' vgroup (if it exists).

        :returns: Dict containing geospatial information.
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Navigation":
                    geospatial = self._get_coords(self.vs, self.fname)
                    geospatial["type"] = "track"  # Type annotation
                    vg.detach()
                    return geospatial

                vg.detach()
            except HDF4Error:  # End of file
                # This is a weird way of handling files, but this is what the
                # pyhdf library demonstrates...
                break

        return None

    def get_temporal(self):
        """
        Search through HDF4 file, returning timestamps from the 'Mission'
        vgroup (if it exists)

        :returns: List containing temporal metadata
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Mission":
                    temporal = self._get_temporal(self.vs, self.fname)
                    vg.detach()
                    return temporal

                vg.detach()
            except HDF4Error:  # End of file
                # This 'except at end of file' thing is some pyhdf weirdness
                # Check the pyhdf documentation for clarification
                break

        return None

    def get_properties(self):
        """
        Returns ceda_di.metadata.properties.Properties object
        containing geospatial and temporal metadata from file.

        :returns: Metadata.product.Properties object
        """
        geospatial = self.get_geospatial()
        temporal = self.get_temporal()
        filesystem = super(HDF4, self).get_filesystem(self.fname)
        data_format = {
            "format": "HDF4",
        }

        instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"])
        flight_info = arsf.Hyperspectral.get_flight_info(
            filesystem["filename"])
        props = product.Properties(spatial=geospatial,
                                   temporal=temporal,
                                   filesystem=filesystem,
                                   data_format=data_format,
                                   instrument=instrument,
                                   flight_info=flight_info)

        return props