def _create_coord_list(self, filenames): from cis.time_util import cis_standard_time_unit # list of coordinate variables we are interested in variables = ['Latitude', 'Longitude', 'TAI_start', 'Profile_time', 'Height'] # reading the various files try: logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) # altitude coordinate height = sdata['Height'] height_data = hdf.read_data(height, "SD") height_metadata = hdf.read_metadata(height, "SD") height_coord = Coord(height_data, height_metadata, "Y") except InvalidVariableError: # This means we are reading a Cloudsat file without height, so remove height from the variables list variables.remove('Height') logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) height_data = None height_coord = None # latitude lat = vdata['Latitude'] lat_data = hdf.read_data(lat, "VD") if height_data is not None: lat_data = utils.expand_1d_to_2d_array(lat_data, len(height_data[0]), axis=1) lat_metadata = hdf.read_metadata(lat, "VD") lat_metadata.shape = lat_data.shape lat_coord = Coord(lat_data, lat_metadata) # longitude lon = vdata['Longitude'] lon_data = hdf.read_data(lon, "VD") if height_data is not None: lon_data = utils.expand_1d_to_2d_array(lon_data, len(height_data[0]), axis=1) lon_metadata = hdf.read_metadata(lon, "VD") lon_metadata.shape = lon_data.shape lon_coord = Coord(lon_data, lon_metadata) # time coordinate time_data = self._generate_time_array(vdata) if height_data is not None: time_data = utils.expand_1d_to_2d_array(time_data, len(height_data[0]), axis=1) time_coord = Coord(time_data, Metadata(name='Profile_time', standard_name='time', shape=time_data.shape, units=str(cis_standard_time_unit), calendar=cis_standard_time_unit.calendar), "X") # create object containing list of coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) if height_coord is not None: coords.append(height_coord) coords.append(time_coord) return coords
def create_data_object(self, filenames, variable): logging.debug("Creating data object for variable " + variable) # reading coordinates coords = self._create_coord_list(filenames) # reading of variables sdata, vdata = hdf.read(filenames, variable) # missing values missing_values = [0, -9999, -4444, -3333] # retrieve data + its metadata if variable in vdata: # vdata should be expanded in the same way as the coordinates are expanded try: height_length = coords.get_coord('Height').shape[1] var = utils.expand_1d_to_2d_array(hdf.read_data(vdata[variable], "VD", missing_values), height_length, axis=1) except CoordinateNotFoundError: var = hdf.read_data(vdata[variable], "VD", missing_values) metadata = hdf.read_metadata(vdata[variable], "VD") elif variable in sdata: var = hdf.read_data(sdata[variable], "SD", missing_values) metadata = hdf.read_metadata(sdata[variable], "SD") else: raise ValueError("variable not found") return UngriddedData(var, metadata, coords)
def _create_coord_list(self, filenames, variable=None): import datetime as dt variables = ["Latitude", "Longitude", "Scan_Start_Time"] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat = sdata["Latitude"] sd_lat = hdf.read_data(lat, "SD") lat_data = self.__field_interpolate(sd_lat) if apply_interpolation else sd_lat lat_metadata = hdf.read_metadata(lat, "SD") lat_coord = Coord(lat_data, lat_metadata, "Y") lon = sdata["Longitude"] lon_data = ( self.__field_interpolate(hdf.read_data(lon, "SD")) if apply_interpolation else hdf.read_data(lon, "SD") ) lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, "X") time = sdata["Scan_Start_Time"] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = "time" time_coord = Coord(time, time_metadata, "T") time_coord.convert_TAI_time_to_std_time(dt.datetime(1993, 1, 1, 0, 0, 0)) return CoordList([lat_coord, lon_coord, time_coord])
def _create_coord_list(self, filenames): import numpy as np from cis.time_util import calculate_mid_time, cis_standard_time_unit variables = ["XDim", "YDim"] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) lat = sdata["YDim"] lat_metadata = hdf.read_metadata(lat, "SD") lon = sdata["XDim"] lon_metadata = hdf.read_metadata(lon, "SD") # expand lat and lon data array so that they have the same shape lat_data = utils.expand_1d_to_2d_array( hdf.read_data(lat, "SD"), lon_metadata.shape, axis=1 ) # expand latitude column wise lon_data = utils.expand_1d_to_2d_array( hdf.read_data(lon, "SD"), lat_metadata.shape, axis=0 ) # expand longitude row wise lat_metadata.shape = lat_data.shape lon_metadata.shape = lon_data.shape # to make sure "Latitude" and "Longitude", i.e. the standard_name is displayed instead of "YDim"and "XDim" lat_metadata.standard_name = "latitude" lat_metadata._name = "" lon_metadata.standard_name = "longitude" lon_metadata._name = "" # create arrays for time coordinate using the midpoint of the time delta between the start date and the end date time_data_array = [] for filename in filenames: mid_datetime = calculate_mid_time(self._get_start_date(filename), self._get_end_date(filename)) logging.debug("Using " + str(mid_datetime) + " as datetime for file " + str(filename)) # Only use part of the full lat shape as it has already been concatenated time_data = np.empty((lat_metadata.shape[0] / len(filenames), lat_metadata.shape[1]), dtype="float64") time_data.fill(mid_datetime) time_data_array.append(time_data) time_data = utils.concatenate(time_data_array) time_metadata = Metadata( name="DateTime", standard_name="time", shape=time_data.shape, units=str(cis_standard_time_unit), calendar=cis_standard_time_unit.calendar, ) coords = CoordList() coords.append(Coord(lon_data, lon_metadata, "X")) coords.append(Coord(lat_data, lat_metadata, "Y")) coords.append(Coord(time_data, time_metadata, "T")) return coords
def _create_one_dimensional_coord_list(self, filenames, index_offset=1): """ Create a set of coordinates appropriate for a ond-dimensional (column integrated) variable :param filenames: :param int index_offset: For 5km products this will choose the coordinates which represent the start (0), middle (1) and end (2) of the 15 shots making up each column retrieval. :return: """ from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # latitude lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def create_data_object(self, filenames, variable): from pywork.CALIOP_utils import mask_data logging.debug("Creating *QC'd* data object for variable " + variable) # reading of variables sdata, vdata = hdf.read(filenames, [variable, "Pressure", "Extinction_QC_Flag_532", "CAD_Score"]) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") # reading coordinates # See if the variable is one dimensional (check the length of shape, neglecting length 1 dimensions) if len([l for l in metadata.shape if l > 1]) == 1: coords = self._create_one_dimensional_coord_list(filenames, index_offset=1) else: coords = self._create_coord_list(filenames, index_offset=1) if variable in MIXED_RESOLUTION_VARIABLES: logging.warning("Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) callback = self._get_mixed_resolution_calipso_data else: callback = self._get_calipso_data var_data = hdf.read_data(sdata[variable], callback) extinction_qc = hdf.read_data(sdata["Extinction_QC_Flag_532"], self._get_mixed_resolution_calipso_data) cad_score = hdf.read_data(sdata["CAD_Score"], self._get_mixed_resolution_calipso_data) qcd_var_data, = mask_data(var_data, cad_score, extinction_qc) # reading coordinates if variable.startswith('Column'): coords = self._create_one_dimensional_coord_list(filenames, index_offset=1) else: coords = self._create_coord_list(filenames, index_offset=1) pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD") # Fix badly formatted units which aren't CF compliant and will break if they are aggregated if str(pres_metadata.units) == "hPA": pres_metadata.units = "hPa" qcd_pres_data = mask_data(pres_data, cad_score, extinction_qc) pres_coord = Coord(qcd_pres_data, pres_metadata, 'P') coords.append(pres_coord) return UngriddedData(qcd_var_data, metadata, coords)
def _create_coord_list(self, filenames, variable=None): import datetime as dt from cis.time_util import convert_time_since_to_std_time, cis_standard_time_unit from cis.utils import concatenate from cf_units import Unit from geotiepoints import modis5kmto1km variables = ['Latitude', 'Longitude', 'View_time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat_data = hdf.read_data(sdata['Latitude'], _get_MODIS_SDS_data) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lon_data = hdf.read_data(sdata['Longitude'], _get_MODIS_SDS_data) lon_metadata = hdf.read_metadata(sdata['Longitude'], "SD") if apply_interpolation: lon_data, lat_data = modis5kmto1km(lon_data, lat_data) lat_coord = Coord(lat_data, lat_metadata, 'Y') lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['View_time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_metadata.units = cis_standard_time_unit t_arrays = [] for f, d in zip(filenames, time): time_start = self._get_start_date(f) t_data = _get_MODIS_SDS_data( d) / 24.0 # Convert hours since to days since t_offset = time_start - dt.datetime(1600, 1, 1) # Convert to CIS time t_arrays.append(t_data + t_offset.days) time_coord = Coord(concatenate(t_arrays), time_metadata, "T") return CoordList([lat_coord, lon_coord, time_coord])
def _create_coord_list(self, filenames, variable=None): import datetime as dt from cis.time_util import convert_time_since_to_std_time, cis_standard_time_unit from cis.utils import concatenate from cf_units import Unit from geotiepoints import modis5kmto1km variables = ['Latitude', 'Longitude', 'View_time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat_data = hdf.read_data(sdata['Latitude'], _get_MODIS_SDS_data) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lon_data = hdf.read_data(sdata['Longitude'], _get_MODIS_SDS_data) lon_metadata = hdf.read_metadata(sdata['Longitude'], "SD") if apply_interpolation: lon_data, lat_data = modis5kmto1km(lon_data, lat_data) lat_coord = Coord(lat_data, lat_metadata, 'Y') lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['View_time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_metadata.units = cis_standard_time_unit t_arrays = [] for f, d in zip(filenames, time): time_start = self._get_start_date(f) t_data = _get_MODIS_SDS_data(d) / 24.0 # Convert hours since to days since t_offset = time_start - dt.datetime(1600, 1, 1) # Convert to CIS time t_arrays.append(t_data + t_offset.days) time_coord = Coord(concatenate(t_arrays), time_metadata, "T") return CoordList([lat_coord, lon_coord, time_coord])
def _create_one_dimensional_coord_list(self, filenames): from cis.time_util import cis_standard_time_unit # list of coordinate variables we are interested in variables = [ 'MODIS_latitude', 'MODIS_longitude', 'TAI_start', 'Profile_time' ] # reading the various files logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) # latitude lat = sdata['MODIS_latitude'] lat_data = hdf.read_data(lat, self._get_cloudsat_sds_data) lat_metadata = hdf.read_metadata(lat, "SD") lat_metadata.shape = lat_data.shape lat_metadata.standard_name = 'latitude' lat_coord = Coord(lat_data, lat_metadata) # longitude lon = sdata['MODIS_longitude'] lon_data = hdf.read_data(lon, self._get_cloudsat_sds_data) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = lon_data.shape lon_metadata.standard_name = 'longitude' lon_coord = Coord(lon_data, lon_metadata) # time coordinate time_data = self._generate_time_array(vdata) time_coord = Coord( time_data, Metadata(name='Profile_time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "X") # create object containing list of coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def create_data_object(self, filenames, variable): logging.debug("Creating data object for variable " + variable) # reading of variables sdata, vdata = hdf.read(filenames, variable) # reading (un-expanded) coordinates, since the data is 1-dimensional coords = self._create_one_dimensional_coord_list(filenames) # retrieve data + its metadata if variable in vdata: var = hdf.read_data(vdata[variable], self._get_cloudsat_vds_data) metadata = hdf.read_metadata(vdata[variable], "VD") elif variable in sdata: var = hdf.read_data(sdata[variable], self._get_cloudsat_sds_data) metadata = hdf.read_metadata(sdata[variable], "SD") else: raise ValueError("variable not found") return UngriddedData(var, metadata, coords)
def _create_coord_list(self, filenames, variable=None): import datetime as dt variables = ['Latitude', 'Longitude', 'Scan_Start_Time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat = sdata['Latitude'] sd_lat = hdf.read_data(lat, _get_MODIS_SDS_data) lat_data = self.__field_interpolate( sd_lat) if apply_interpolation else sd_lat lat_metadata = hdf.read_metadata(lat, "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') lon = sdata['Longitude'] if apply_interpolation: lon_data = self.__field_interpolate( hdf.read_data(lon, _get_MODIS_SDS_data)) else: lon_data = hdf.read_data(lon, _get_MODIS_SDS_data) lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['Scan_Start_Time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_coord = Coord(time, time_metadata, "T", _get_MODIS_SDS_data) time_coord.convert_TAI_time_to_std_time( dt.datetime(1993, 1, 1, 0, 0, 0)) return CoordList([lat_coord, lon_coord, time_coord])
def _create_coord_list(self, filenames, variable=None): import datetime as dt variables = ['Latitude', 'Longitude', 'Scan_Start_Time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) self.apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) self.apply_interpolation = scale == "1km" lat = sdata['Latitude'] lat_data = hdf.read_data(lat, _get_MODIS_SDS_data) lon = sdata['Longitude'] lon_data = hdf.read_data(lon, _get_MODIS_SDS_data) if self.apply_interpolation: lon_data, lat_data = modis5kmto1km(lon_data[:], lat_data[:]) lat_metadata = hdf.read_metadata(lat, "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['Scan_Start_Time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_data = hdf.read_data(time, _get_MODIS_SDS_data) if self.apply_interpolation: time_data = np.repeat(np.repeat(time_data, 5, axis=0), 5, axis=1) time_coord = Coord(time_data, time_metadata, "T") time_coord.convert_TAI_time_to_std_time( dt.datetime(1993, 1, 1, 0, 0, 0)) return CoordList([lat_coord, lon_coord, time_coord])
def _create_one_dimensional_coord_list(self, filenames): from cis.time_util import cis_standard_time_unit # list of coordinate variables we are interested in variables = ['MODIS_latitude', 'MODIS_longitude', 'TAI_start', 'Profile_time'] # reading the various files logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) # latitude lat = sdata['MODIS_latitude'] lat_data = hdf.read_data(lat, self._get_cloudsat_sds_data) lat_metadata = hdf.read_metadata(lat, "SD") lat_metadata.shape = lat_data.shape lat_metadata.standard_name = 'latitude' lat_coord = Coord(lat_data, lat_metadata) # longitude lon = sdata['MODIS_longitude'] lon_data = hdf.read_data(lon, self._get_cloudsat_sds_data) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = lon_data.shape lon_metadata.standard_name = 'longitude' lon_coord = Coord(lon_data, lon_metadata) # time coordinate time_data = self._generate_time_array(vdata) time_coord = Coord(time_data, Metadata(name='Profile_time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "X") # create object containing list of coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def _create_coord_list(self, filenames, variable=None): import datetime as dt variables = ['Latitude', 'Longitude', 'Scan_Start_Time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat = sdata['Latitude'] lat_data = hdf.read_data(lat, _get_MODIS_SDS_data) lon = sdata['Longitude'] lon_data = hdf.read_data(lon, _get_MODIS_SDS_data) if apply_interpolation: lon_data, lat_data = modis5kmto1km(lon_data[:], lat_data[:]) lat_metadata = hdf.read_metadata(lat, "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['Scan_Start_Time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_data = hdf.read_data(time, _get_MODIS_SDS_data) time_data = np.repeat(np.repeat(time_data, 5, axis=0), 5, axis=1) time_coord = Coord(time_data, time_metadata, "T") time_coord.convert_TAI_time_to_std_time(dt.datetime(1993, 1, 1, 0, 0, 0)) return CoordList([lat_coord, lon_coord, time_coord])
def create_data_object(self, filenames, variable): logging.debug("Creating data object for variable " + variable) # reading coordinates # the variable here is needed to work out whether to apply interpolation to the lat/lon data or not coords = self._create_coord_list(filenames, variable) # reading of variables sdata, vdata = hdf.read(filenames, variable) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") # cut off the edges of the data... # TODO CHECK THIS IS ACTUALLY VALID BEFORE PUBLISHING ANYTHING WITH THIS d = hdf.read_data(var, _get_MODIS_SDS_data)[:, 2:-2] return UngriddedData(d, metadata, coords, _get_MODIS_SDS_data)
def create_data_object(self, filenames, variable): logging.debug("Creating data object for variable " + variable) # reading coordinates # the variable here is needed to work out whether to apply interpolation to the lat/lon data or not coords = self._create_coord_list(filenames, variable) # reading of variables sdata, vdata = hdf.read(filenames, variable) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") # cut off the edges of the data... # TODO CHECK THIS IS ACTUALLY VALID BEFORE PUBLISHING ANYTHING WITH THIS d = hdf.read_data(var, _get_MODIS_SDS_data) if self.apply_interpolation: d = d[:, 2:-2] return UngriddedData(d, metadata, coords, _get_MODIS_SDS_data)
def _create_coord_list(self, filenames, index_offset=0): import logging from cis.data_io import hdf as hdf from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata import cis.utils as utils from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_data *= 1000.0 # Convert to m len_x = alt_data.shape[0] lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data) len_y = lat_data.shape[0] new_shape = (len_x, len_y) # altitude alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0) alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape) alt_coord = Coord(alt_data, alt_metadata) # pressure if self.include_pressure: pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD") # Fix badly formatted units which aren't CF compliant and will break if they are aggregated if str(pres_metadata.units) == "hPA": pres_metadata.units = "hPa" pres_metadata.shape = new_shape pres_coord = Coord(pres_data, pres_metadata, 'P') # latitude lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_metadata.shape = new_shape lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data) lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = new_shape lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data) time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) coords.append(alt_coord) if self.include_pressure and (pres_data.shape == alt_data.shape): # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid # than for the Lidar_Data_Altitudes. coords.append(pres_coord) return coords
def _create_coord_list(self, filenames): from cis.time_util import cis_standard_time_unit # list of coordinate variables we are interested in variables = [ 'Latitude', 'Longitude', 'TAI_start', 'Profile_time', 'Height' ] # reading the various files try: logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) # altitude coordinate height = sdata['Height'] height_data = hdf.read_data(height, self._get_cloudsat_sds_data) height_metadata = hdf.read_metadata(height, "SD") height_coord = Coord(height_data, height_metadata, "Y") except InvalidVariableError: # This means we are reading a Cloudsat file without height, so remove height from the variables list variables.remove('Height') logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) height_data = None height_coord = None # latitude lat = vdata['Latitude'] lat_data = hdf.read_data(lat, self._get_cloudsat_vds_data) if height_data is not None: lat_data = utils.expand_1d_to_2d_array(lat_data, len(height_data[0]), axis=1) lat_metadata = hdf.read_metadata(lat, "VD") lat_metadata.shape = lat_data.shape lat_coord = Coord(lat_data, lat_metadata) # longitude lon = vdata['Longitude'] lon_data = hdf.read_data(lon, self._get_cloudsat_vds_data) if height_data is not None: lon_data = utils.expand_1d_to_2d_array(lon_data, len(height_data[0]), axis=1) lon_metadata = hdf.read_metadata(lon, "VD") lon_metadata.shape = lon_data.shape lon_coord = Coord(lon_data, lon_metadata) # time coordinate time_data = self._generate_time_array(vdata) if height_data is not None: time_data = utils.expand_1d_to_2d_array(time_data, len(height_data[0]), axis=1) time_coord = Coord( time_data, Metadata(name='Profile_time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "X") # create object containing list of coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) if height_coord is not None: coords.append(height_coord) coords.append(time_coord) return coords
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning( "Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0, )), (lon_coord, (0, )), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning("Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0,)), (lon_coord, (0,)), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd