def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y")] # if usr_variable is not None: # variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in variables]) for var, (name, axis) in zip(var_data.values(), variables): try: coords.append(Coord(var, get_metadata(var[0]), axis=axis)) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: usr_var_data = read_many_files_individually( filenames, usr_variable)[usr_variable] res = UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), coords) return res
def _create_coord_list(self, filename): import numpy as np coords = CoordList() time_data = read(filename, 'time')['time'] try: alt_data = read(filename, 'altitude')['altitude'] except InvalidVariableError: alt_data = read(filename, 'range')['range'] len_y = alt_data.shape[1] time_arr = utils.expand_1d_to_2d_array(time_data[:], len_y, axis=1) t_coord = Coord(time_arr, get_metadata(time_data), axis='x') t_coord.convert_to_std_time() coords.append(t_coord) #alt_arr = utils.expand_1d_to_2d_array(alt_data[:], len_x, axis=0) alt_arr = alt_data[:, :, 0] #eliminate "angle" axis #alt_arr = alt_data #eliminate "angle" axis coords.append(Coord(alt_arr, get_metadata(alt_data), axis='y')) lat_data = read(filename, 'latitude')['latitude'] lat_arr = np.ones(alt_arr.shape) * lat_data[:] coords.append(Coord(lat_arr, get_metadata(lat_data))) lon_data = read(filename, 'longitude')['longitude'] lon_arr = np.ones(alt_arr.shape) * lon_data[:] coords.append(Coord(lon_arr, get_metadata(lon_data))) return coords
def __init__(self, data, metadata, coords, data_retrieval_callback=None): """ Constructor :param data: The data handler (e.g. SDS instance) for the specific data type, or a numpy array of data. This can be a list of data handlers, or a single data handler :param metadata: Any associated metadata :param coords: A list of the associated Coord objects :param data_retrieval_callback: A method for retrieving data when needed """ from cis.data_io.Coord import CoordList, Coord if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") # TODO Find a cleaner workaround for this, for some reason UDUNITS can not parse 'per kilometer per steradian' if str(metadata.units) == 'per kilometer per steradian': metadata.units = 'kilometer^-1 steradian^-1' super(UngriddedData, self).__init__(data, metadata, data_retrieval_callback)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError variables = [("lon", "x"), ("lat", "y")] # if usr_variable is not None: # variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in variables]) for var, (name, axis) in zip(var_data.values(), variables): try: coords.append(Coord(var, get_metadata(var[0]), axis=axis)) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: usr_var_data = read_many_files_individually(filenames, usr_variable)[usr_variable] res = UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), coords) return res
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("air_pressure", "p")] logging.info("Listing coordinates: " + str(variables)) coords = CoordList() for variable in variables: try: var_data = read_many_files_individually(filenames, variable[0])[variable[0]] coords.append(Coord(var_data, get_metadata(var_data[0]), axis=variable[1])) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: usr_var_data = read_many_files_individually(filenames, usr_variable)[usr_variable] res = UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), coords) return res
def create_coords(self, filenames, variable=None): """ Reads the coordinates and data if required from the files :param filenames: List of filenames to read coordinates from :param variable: load a variable for the data :return: Coordinates """ from cis.data_io.netcdf import read_many_files_individually from cis.data_io.Coord import Coord, CoordList from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("air_pressure", "p")] dim_coords = CoordList() for v in variables: try: var_data = read_many_files_individually(filenames, v[0])[v[0]] dim_coords.append(Coord(var_data, get_metadata(var_data[0]), axis=v[1])) except InvalidVariableError: pass if variable is None: return UngriddedCoordinates(dim_coords) else: all_coords = self._add_aux_coordinate(dim_coords, filenames[0], 'DP_MID', dim_coords.get_coord(standard_name='time').data.size) usr_var_data = read_many_files_individually(filenames, variable)[variable] return UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), all_coords)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata, get_netcdf_file_variables from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError # We have to read it once first to find out which variables are in there. We assume the set of coordinates in # all the files are the same file_variables = get_netcdf_file_variables(filenames[0]) def get_axis_std_name(var): axis=None lvar = var.lower() if lvar.startswith('lon'): axis = 'x', 'longitude' if lvar.startswith('lat'): axis = 'y', 'latitude' if lvar == 'G_ALT' or lvar == 'altitude' or lvar == 'pressure_altitude': axis = 'z', 'altitude' if lvar == 'time': axis = 't', 'time' if lvar == 'p' or lvar == 'pressure' or lvar == 'static_pressure': axis = 'p', 'air_pressure' return axis all_coord_variables = [(v, get_axis_std_name(v)) for v in file_variables if get_axis_std_name(v) is not None] # Get rid of any duplicates coord_variables = [] for v in all_coord_variables: if v is None or v[1][1] not in [x[1][1] for x in coord_variables]: coord_variables.append(v) all_variables = coord_variables.copy() if usr_variable is not None: all_variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(all_variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in all_variables]) for name, axis_std_name in coord_variables: try: meta = get_metadata(var_data[name][0]) if meta.standard_name is None: meta.standard_name = axis_std_name[1] coord = Coord(var_data[name], meta, axis=axis_std_name[0]) if meta.standard_name == 'time': # Converting units to CIS std time coord.convert_to_std_time() coords.append(coord) except InvalidVariableError: pass if usr_variable is None: res = UngriddedCoordinates(coords) else: res = UngriddedData(var_data[usr_variable], get_metadata(var_data[usr_variable][0]), coords) return res
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata, get_netcdf_file_variables from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError # We have to read it once first to find out which variables are in there. We assume the set of coordinates in # all the files are the same file_variables = get_netcdf_file_variables(filenames[0]) def get_axis_std_name(var): axis=None lvar = var.lower() if lvar == 'longitude': axis = 'x', 'longitude' if lvar == 'latitude': axis = 'y', 'latitude' if lvar == 'G_ALT' or lvar == 'altitude' or lvar == 'pressure_altitude': axis = 'z', 'altitude' if lvar == 'time': axis = 't', 'time' if lvar == 'p' or lvar == 'pressure' or lvar == 'static_pressure': axis = 'p', 'air_pressure' return axis all_coord_variables = [(v, get_axis_std_name(v)) for v in file_variables if get_axis_std_name(v) is not None] # Get rid of any duplicates coord_variables = [] for v in all_coord_variables: if v is None or v[1][1] not in [x[1][1] for x in coord_variables]: coord_variables.append(v) all_variables = coord_variables.copy() if usr_variable is not None: all_variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(all_variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in all_variables]) for name, axis_std_name in coord_variables: try: meta = get_metadata(var_data[name][0]) if meta.standard_name is None: meta.standard_name = axis_std_name[1] coords.append(Coord(var_data[name], meta, axis=axis_std_name[0])) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: res = UngriddedData(var_data[usr_variable], get_metadata(var_data[usr_variable][0]), coords) return res
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata, get_netcdf_file_variables from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError # We have to read it once first to find out which variables are in there. We assume the set of coordinates in # all the files are the same file_variables = get_netcdf_file_variables(filenames[0]) def get_axis_std_name(lvar): axis=None if lvar == 'LON_JAVAD' or lvar == 'LON_OXTS': axis = 'x', 'longitude' if lvar == 'LAT_JAVAD' or lvar == 'LAT_OXTS': axis = 'y', 'latitude' if lvar == 'ALT_JAVAD' or lvar == 'ALT_OXTS': axis = 'z', 'altitude' if lvar == 'Time': axis = 't', 'time' if lvar == 'PS_AIR': axis = 'p', 'air_pressure' return axis all_coord_variables = [(v, get_axis_std_name(v)) for v in file_variables if get_axis_std_name(v) is not None] # Get rid of any duplicates coord_variables = [] for v in all_coord_variables: if v is None or v[1][1] not in [x[1][1] for x in coord_variables]: coord_variables.append(v) all_variables = coord_variables.copy() if usr_variable is not None: all_variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(all_variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in all_variables]) for name, axis_std_name in coord_variables: try: meta = get_metadata(var_data[name][0]) if meta.standard_name is None: meta.standard_name = axis_std_name[1] coords.append(Coord(var_data[name], meta, axis=axis_std_name[0])) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: res = UngriddedData(var_data[usr_variable], get_metadata(var_data[usr_variable][0]), coords) return res
def _create_coord_list(self, filenames): import numpy as np from cis.time_util import calculate_mid_time, cis_standard_time_unit variables = ["XDim", "YDim"] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) lat = sdata["YDim"] lat_metadata = hdf.read_metadata(lat, "SD") lon = sdata["XDim"] lon_metadata = hdf.read_metadata(lon, "SD") # expand lat and lon data array so that they have the same shape lat_data = utils.expand_1d_to_2d_array( hdf.read_data(lat, "SD"), lon_metadata.shape, axis=1 ) # expand latitude column wise lon_data = utils.expand_1d_to_2d_array( hdf.read_data(lon, "SD"), lat_metadata.shape, axis=0 ) # expand longitude row wise lat_metadata.shape = lat_data.shape lon_metadata.shape = lon_data.shape # to make sure "Latitude" and "Longitude", i.e. the standard_name is displayed instead of "YDim"and "XDim" lat_metadata.standard_name = "latitude" lat_metadata._name = "" lon_metadata.standard_name = "longitude" lon_metadata._name = "" # create arrays for time coordinate using the midpoint of the time delta between the start date and the end date time_data_array = [] for filename in filenames: mid_datetime = calculate_mid_time(self._get_start_date(filename), self._get_end_date(filename)) logging.debug("Using " + str(mid_datetime) + " as datetime for file " + str(filename)) # Only use part of the full lat shape as it has already been concatenated time_data = np.empty((lat_metadata.shape[0] / len(filenames), lat_metadata.shape[1]), dtype="float64") time_data.fill(mid_datetime) time_data_array.append(time_data) time_data = utils.concatenate(time_data_array) time_metadata = Metadata( name="DateTime", standard_name="time", shape=time_data.shape, units=str(cis_standard_time_unit), calendar=cis_standard_time_unit.calendar, ) coords = CoordList() coords.append(Coord(lon_data, lon_metadata, "X")) coords.append(Coord(lat_data, lat_metadata, "Y")) coords.append(Coord(time_data, time_metadata, "T")) return coords
def _create_coordinates_list(self, data_variables, variable_selector): """ Create a co-ordinate list for the data :param data_variables: the load data :param variable_selector: the variable selector for the data :return: a list of coordinates """ coords = CoordList() # Time time_coord = self._create_time_coord( variable_selector.time_stamp_info, variable_selector.time_variable_name, data_variables) coords.append(time_coord) # Lat and Lon # Multiple points counts for multiple files points_count = [ np.product(var.shape) for var in data_variables[variable_selector.time_variable_name] ] if variable_selector.station: lat_coord = self._create_fixed_value_coord( "Y", variable_selector.station_latitude, "degrees_north", points_count, "latitude") lon_coord = self._create_fixed_value_coord( "X", variable_selector.station_longitude, "degrees_east", points_count, "longitude") else: lat_coord = self._create_coord( "Y", variable_selector.latitude_variable_name, data_variables, "latitude") lon_coord = self._create_coord( "X", variable_selector.longitude_variable_name, data_variables, "longitude") coords.append(lat_coord) coords.append(lon_coord) # Altitude if variable_selector.altitude is None: altitude_coord = self._create_coord( "Z", variable_selector.altitude_variable_name, data_variables, "altitude") else: altitude_coord = self._create_fixed_value_coord( "Z", variable_selector.altitude, "meters", points_count, "altitude") coords.append(altitude_coord) # Pressure if variable_selector.pressure_variable_name is not None: coords.append( self._create_coord("P", variable_selector.pressure_variable_name, data_variables, "air_pressure")) return coords
def _create_coord_list(self, filenames, data=None): from cis.data_io.ungridded_data import Metadata from cis.time_util import cis_standard_time_unit as ct if data is None: data_all = load_multiple_hysplit(filenames) # TODO only using first trajectory data = data_all[list(data_all.keys())[0]] coords = CoordList() latM = Metadata(name="Latitude", shape=(len(data),), units="degrees_north", range=(-90,90), standard_name='latitude') lonM = Metadata(name="Longitude", shape=(len(data),), units="degrees_east", range=(-180,180), standard_name='longitude') altM = Metadata(name="Altitude", shape=(len(data),), units="meters", standard_name='altitude') presM = Metadata(name="Pressure", shape=(len(data),), units="hPa", standard_name='air_pressure') timeM = Metadata(name="DateTime", standard_name="time", shape=(len(data),), units=str(ct)) coords.append(Coord(data['LAT'], latM)) coords.append(Coord(data['LON'], lonM)) coords.append(Coord(data['ALT'], altM)) coords.append(Coord(data['PRESSURE'], presM)) coords.append(Coord(data['DATETIMES'], timeM, "X")) # TODO Why X axis? return coords
def _create_coord_list(self, filename): import numpy as np coords = CoordList() time_data = read(filename, 'time')['time'] len_x = time_data.shape[0] try: alt_data = read(filename, 'altitude')['altitude'] except InvalidVariableError: alt_data = read(filename, 'range')['range'] len_y = alt_data.shape[0] time_arr = utils.expand_1d_to_2d_array(time_data[:], len_y, axis=1) t_coord = Coord(time_arr, get_metadata(time_data), axis='x') t_coord.convert_to_std_time() coords.append(t_coord) alt_arr = utils.expand_1d_to_2d_array(alt_data[:], len_x, axis=0) coords.append(Coord(alt_arr, get_metadata(alt_data), axis='y')) lat_data = read(filename, 'latitude')['latitude'] lat_arr = np.ones(alt_arr.shape) * lat_data[:] coords.append(Coord(lat_arr, get_metadata(lat_data))) lon_data = read(filename, 'longitude')['longitude'] lon_arr = np.ones(alt_arr.shape) * lon_data[:] coords.append(Coord(lon_arr, get_metadata(lon_data))) return coords
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata, get_netcdf_file_variables from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError # We have to read it once first to find out which variables are in there. We assume the set of coordinates in # all the files are the same file_variables = get_netcdf_file_variables(filenames[0]) axis_lookup = { "longitude": "x", 'latitude': 'y', 'altitude': 'z', 'time': 't', 'air_pressure': 'p' } coord_variables = [(v, axis_lookup[v]) for v in file_variables if v in axis_lookup] # Create a copy to contain all the variables to read all_variables = list(coord_variables) if usr_variable is not None: all_variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(all_variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in all_variables]) for name, axis in coord_variables: try: coords.append( Coord(var_data[name], get_metadata(var_data[name][0]), axis=axis)) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: res = UngriddedData(var_data[usr_variable], get_metadata(var_data[usr_variable][0]), coords) return res
def _create_coord_list(self, filenames): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord from cis.exceptions import InvalidVariableError try: variables = ["lon", "lat", "time"] data = read_many_files_individually(filenames, variables) except InvalidVariableError: variables = ["longitude", "latitude", "time"] data = read_many_files_individually(filenames, variables) logging.info("Listing coordinates: " + str(variables)) coords = CoordList() coords.append( Coord(data[variables[0]], get_metadata(data[variables[0]][0]), "X")) coords.append( Coord(data[variables[1]], get_metadata(data[variables[1]][0]), "Y")) coords.append( self._fix_time( Coord(data[variables[2]], get_metadata(data[variables[2]][0]), "T"))) return coords
def _create_coord_list(self): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata from cis.time_util import cis_standard_time_unit as cstu # These implement a lot of what is necessary, but aren't in CIS style from acp_utils import rolling_window from orbit import ATSR lat_data = [] lon_data = [] time_data = [] for fname in self.filenames: prod = ATSR(fname) lat_data.append(prod.lat) lon_data.append(prod.lon) time_data.append(prod.get_time()) # TODO: Properly define metadata lat_meta = Metadata(standard_name="latitude", units="degrees") lon_meta = Metadata(standard_name="longitude", units="degrees") time_meta = Metadata(standard_name="time", units=cstu) lat = Coord(concatenate(lat_data), lat_meta, "Y") lat.update_shape() lat.update_range() lon = Coord(concatenate(lon_data), lon_meta, "Y") lon.update_shape() lon.update_range() time = Coord(concatenate(time_data), time_meta, "T") time.update_shape() time.update_range() return CoordList([lat, lon, time])
def _create_coord_list(self): """Read file coordinates into a CIS object""" from cis.data_io.Coord import Coord, CoordList from reame.utils import ncdf_read try: lon_data, lon_metadata = ncdf_read(self.filenames, "longitude") lat_data, lat_metadata = ncdf_read(self.filenames, "latitude") except IndexError: lon_data, lon_metadata = ncdf_read(self.filenames, "lon") lat_data, lat_metadata = ncdf_read(self.filenames, "lat") lat = Coord(lat_data, lat_metadata, "Y") lat.update_shape() lat.update_range() lon = Coord(lon_data, lon_metadata, "X") lon.update_shape() lat.update_range() time_data, time_metadata = ncdf_read(self.filenames, "time") # Ensure the standard name is set time_metadata.standard_name = "time" time = Coord(time_data, time_metadata, "T") time.convert_TAI_time_to_std_time(ATSR_REFERENCE_TIME) time.update_shape() time.update_range() return CoordList([lat, lon, time])
def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord( x, Metadata(name='lat', standard_name='latitude', units='degrees')) y = Coord( y, Metadata(name='lon', standard_name='longitude', units='degrees')) data = np.reshape(np.arange(15) + 1.0, (5, 3)) self.coords = CoordList([x, y]) ug1 = UngriddedData( data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) ug2 = UngriddedData( data * 0.1, Metadata(standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list = UngriddedDataList([ug1, ug2])
def __init__(self, data, metadata, coords): from cis.data_io.Coord import CoordList from cis.utils import listify def getmask(arr): mask = np.ma.getmaskarray(arr) try: mask |= np.isnan(arr) except ValueError: pass return mask data = listify(data) metadata = listify(metadata) if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") # Throw out points where any coordinate is masked combined_mask = np.zeros(data[0].shape, dtype=bool) for coord in self._coords: combined_mask |= getmask(coord.data) for bound in np.moveaxis(coord.bounds, -1, 0): combined_mask |= getmask(bound) coord.update_shape() coord.update_range() if combined_mask.any(): keep = np.logical_not(combined_mask) data = [variable[keep] for variable in data] for coord in self._coords: coord.data = coord.data[keep] new_bounds = np.array([ bound[keep] for bound in np.moveaxis(coord.bounds, -1, 0) ]) coord.bounds = np.moveaxis(new_bounds, 0, -1) coord.update_shape() coord.update_range() super(UngriddedCube, self).__init__(zip(data, metadata))
def create_dummy_coordinates_list(): coord1 = Coord(numpy.array([5, 4]), Metadata(standard_name='grid_latitude'), axis='Y') coord2 = Coord(numpy.array([5, 4]), Metadata(standard_name='grid_longitude'), axis='X') return CoordList([coord1, coord2])
def _create_coord_list(self, filenames): from cis.time_util import cis_standard_time_unit # list of coordinate variables we are interested in variables = ['Latitude', 'Longitude', 'TAI_start', 'Profile_time', 'Height'] # reading the various files try: logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) # altitude coordinate height = sdata['Height'] height_data = hdf.read_data(height, "SD") height_metadata = hdf.read_metadata(height, "SD") height_coord = Coord(height_data, height_metadata, "Y") except InvalidVariableError: # This means we are reading a Cloudsat file without height, so remove height from the variables list variables.remove('Height') logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) height_data = None height_coord = None # latitude lat = vdata['Latitude'] lat_data = hdf.read_data(lat, "VD") if height_data is not None: lat_data = utils.expand_1d_to_2d_array(lat_data, len(height_data[0]), axis=1) lat_metadata = hdf.read_metadata(lat, "VD") lat_metadata.shape = lat_data.shape lat_coord = Coord(lat_data, lat_metadata) # longitude lon = vdata['Longitude'] lon_data = hdf.read_data(lon, "VD") if height_data is not None: lon_data = utils.expand_1d_to_2d_array(lon_data, len(height_data[0]), axis=1) lon_metadata = hdf.read_metadata(lon, "VD") lon_metadata.shape = lon_data.shape lon_coord = Coord(lon_data, lon_metadata) # time coordinate time_data = self._generate_time_array(vdata) if height_data is not None: time_data = utils.expand_1d_to_2d_array(time_data, len(height_data[0]), axis=1) time_coord = Coord(time_data, Metadata(name='Profile_time', standard_name='time', shape=time_data.shape, units=str(cis_standard_time_unit), calendar=cis_standard_time_unit.calendar), "X") # create object containing list of coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) if height_coord is not None: coords.append(height_coord) coords.append(time_coord) return coords
def _create_coord_list(self, filenames, data=None): from cis.data_io.ungridded_data import Metadata from cis.data_io.aeronet import load_multiple_aeronet from cis.time_util import cis_standard_time_unit as ct if data is None: data = load_multiple_aeronet(filenames) coords = CoordList() coords.append(Coord(data['longitude'], Metadata(name="Longitude", shape=(len(data),), units="degrees_east", range=(-180, 180)))) coords.append(Coord(data['latitude'], Metadata(name="Latitude", shape=(len(data),), units="degrees_north", range=(-90, 90)))) coords.append(Coord(data['altitude'], Metadata(name="Altitude", shape=(len(data),), units="meters"))) coords.append(Coord(data["datetime"], Metadata(name="DateTime", standard_name='time', shape=(len(data),), units=ct), "X")) return coords
def _create_coord_list(self): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata from cis.time_util import convert_sec_since_to_std_time from os.path import basename lat_all = [] lon_all = [] time_all = [] for fname in self.filenames: var_name = self.gdal_variable_name(fname, "Optical_Depth_055") if self.grid_path: granule = basename(fname).split(".")[2] lat_data, lon_data = self._read_grid_centres(granule) else: lat_data, lon_data = self._calculate_grid_centres(var_name) time_data = self._calculate_grid_time(var_name, lat_data, lon_data) # Workaround files containing only one day sh = (-1, ) + lat_data.shape time_data = time_data.reshape(sh) keep = np.logical_not(self._read_qcmask(fname)).reshape(sh) for time_slice, keep_slice in zip(time_data, keep): lat_all.extend(lat_data[keep_slice]) lon_all.extend(lon_data[keep_slice]) time_all.extend(time_slice[keep_slice]) if len(lat_all) == 0: raise NotImplementedError("It's empty!") lat = Coord( np.ma.array(lat_all), Metadata(name="lat", standard_name="latitude", units="degrees", range=(-90., 90.)), "Y") lat.update_shape() lon = Coord( np.ma.array(lon_all), Metadata(name="lon", standard_name="longitude", units="degrees", range=(-180., 180.)), "X") lon.update_shape() time = Coord( np.ma.array(time_all), Metadata(name="time", standard_name="time", units="Seconds since 1993-1-1 00:00:00.0 0"), "T") time.convert_TAI_time_to_std_time(MODIS_REFERENCE_TIME) time.update_shape() # Set the QC mask as we now know how many points we have self._qcmask = np.full(lat.shape, False) return CoordList([lat, lon, time])
def __init__(self, coords): """ Constructor :param coords: A list of the associated Coord objects """ from cis.data_io.Coord import CoordList, Coord if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") self._post_process() all_coords = self._coords.find_standard_coords() self.coords_flattened = [(c.data_flattened if c is not None else None) for c in all_coords]
def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) self.x = Coord(x, Metadata(standard_name='latitude', units='degrees')) self.y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([self.x, self.y]) self.ug = UngriddedCoordinates(coords)
def create_coords(self, filenames, variable=None): from cis.data_io.ungridded_data import Metadata from numpy import genfromtxt, NaN from cis.exceptions import InvalidVariableError from cis.time_util import convert_datetime_to_std_time import dateutil.parser as du array_list = [] for filename in filenames: try: array_list.append(genfromtxt(filename, dtype="f8,f8,f8,O,f8", names=['latitude', 'longitude', 'altitude', 'time', 'value'], delimiter=',', missing_values='', usemask=True, invalid_raise=True, converters={"time": du.parse})) except: raise IOError('Unable to read file ' + filename) data_array = utils.concatenate(array_list) n_elements = len(data_array['latitude']) coords = CoordList() coords.append(Coord(data_array["latitude"], Metadata(standard_name="latitude", shape=(n_elements,), units="degrees_north"))) coords.append(Coord(data_array["longitude"], Metadata(standard_name="longitude", shape=(n_elements,), units="degrees_east"))) coords.append( Coord(data_array["altitude"], Metadata(standard_name="altitude", shape=(n_elements,), units="meters"))) time_arr = convert_datetime_to_std_time(data_array["time"]) time = Coord(time_arr, Metadata(standard_name="time", shape=(n_elements,), units="days since 1600-01-01 00:00:00")) coords.append(time) if variable: try: data = UngriddedData(data_array['value'], Metadata(name="value", shape=(n_elements,), units="unknown", missing_value=NaN), coords) except: InvalidVariableError("Value column does not exist in file " + filenames) return data else: return UngriddedCoordinates(coords)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.data_io.Coord import Coord, CoordList from cis.exceptions import InvalidVariableError variables = [("lon", "x", 'longitude'), ("lat", "y", 'latitude'), ("alt", "z", 'altitude'), ("time", "t", 'time'), ("p", "p", 'air_pressure')] logging.info("Listing coordinates: " + str(variables)) coords = CoordList() for variable in variables: try: var_data = read_many_files_individually( filenames, variable[0])[variable[0]] meta = get_metadata(var_data[0]) meta.standard_name = variable[2] # Some of the variables have an illegal name attribute... meta.misc.pop('name', None) c = Coord(var_data, meta, axis=variable[1]) if variable[1] == 'z': c.convert_units('m') coords.append(c) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: usr_var_data = read_many_files_individually( filenames, usr_variable)[usr_variable] meta = get_metadata(usr_var_data[0]) # Some of the variables have an illegal name attribute... meta.misc.pop('name', None) res = UngriddedData(usr_var_data, meta, coords) return res
def _create_coord_list(self, filenames): from cis.data_io.netcdf import read_many_files, get_metadata from cis.data_io.Coord import Coord import datetime # FIXME: when reading an existing file variables might be "latitude", "longitude" variables = ["lat", "lon", "time"] logging.info("Listing coordinates: " + str(variables)) data = read_many_files(filenames, variables, dim="pixel_number") coords = CoordList() coords.append(Coord(data["lon"], get_metadata(data["lon"]), "X")) coords.append(Coord(data["lat"], get_metadata(data["lat"]), "Y")) time_coord = Coord(data["time"], get_metadata(data["time"]), "T") time_coord.convert_TAI_time_to_std_time(datetime.datetime(1970, 1, 1)) coords.append(time_coord) return coords
def _create_coord_list(self, filenames): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord variables = ["lat", "lon", "time"] logging.info("Listing coordinates: " + str(variables)) var_data = read_many_files_individually(filenames, variables) coords = CoordList() coords.append(Coord(var_data['lat'], get_metadata(var_data['lat'][0]), 'Y')) coords.append(Coord(var_data['lon'], get_metadata(var_data['lon'][0]), 'X')) time_coord = Coord(var_data['time'], get_metadata(var_data['time'][0])) # TODO: Is this really julian? time_coord.convert_julian_to_std_time() coords.append(time_coord) return coords
def _create_coord_list(self, filenames): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord from cis.exceptions import InvalidVariableError try: variables = ["lon", "lat", "time"] data = read_many_files_individually(filenames, variables) except InvalidVariableError: variables = ["longitude", "latitude", "time"] data = read_many_files_individually(filenames, variables) logging.info("Listing coordinates: " + str(variables)) coords = CoordList() coords.append(Coord(data[variables[0]], get_metadata(data[variables[0]][0]), "X")) coords.append(Coord(data[variables[1]], get_metadata(data[variables[1]][0]), "Y")) coords.append(self._fix_time(Coord(data[variables[2]], get_metadata(data[variables[2]][0]), "T"))) return coords
def _create_coord_list(self, filenames, data=None): from cis.data_io.ungridded_data import Metadata from cis.data_io.aeronet import load_multiple_aeronet from cis.time_util import cis_standard_time_unit as ct if data is None: data = load_multiple_aeronet(filenames) coords = CoordList() coords.append(Coord(data['longitude'], Metadata(name="Longitude", shape=(len(data),), units="degrees_east", range=(-180, 180)))) coords.append(Coord(data['latitude'], Metadata(name="Latitude", shape=(len(data),), units="degrees_north", range=(-90, 90)))) coords.append(Coord(data['altitude'], Metadata(name="Altitude", shape=(len(data),), units="meters"))) coords.append(Coord(data["datetime"], Metadata(name="DateTime", standard_name='time', shape=(len(data),), units=str(ct)), "X")) return coords
def create_coords(self, filenames): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates var_data = read_many_files_individually(filenames, ["longitude", "latitude", "time"]) lon = Coord(var_data["longitude"], get_metadata(var_data["longitude"][0]), axis="x") lat = Coord(var_data["latitude"], get_metadata(var_data["latitude"][0]), axis="y") time = Coord(var_data["time"], get_metadata(var_data["time"][0]), axis="t") coords = CoordList([lat, lon, time]) return UngriddedCoordinates(coords)
def _create_coord_list(self, filenames, data=None): if data is None: data = {} #initialise data dictionary inData = netCDF4.Dataset(filenames[0]) #open netCDF file data['longitude'] = np.array( inData.variables['longitude']) #extract longitudes data['latitude'] = np.array( inData.variables['latitude']) #extract latitudes origTimes = np.array(inData.variables['time']) #extract times #convert to days since 1600-01-01 (cis col doesn't work otherwise - not sure why...): niceDateTime = cf_units.num2date(origTimes, 'days since 1990-01-01 00:00:00', 'gregorian') data['time'] = cf_units.date2num(niceDateTime, 'days since 1600-01-01 00:00:00', 'gregorian') inData.close() #close netCDF file coords = CoordList() #initialise coordinate list #Append latitudes and longitudes to coordinate list: coords.append( Coord( data['longitude'], Metadata(name="longitude", long_name='longitude', standard_name='longitude', shape=(len(data), ), missing_value=-999.0, units="degrees_east", range=(-180, 180)), "x")) coords.append( Coord( data['latitude'], Metadata(name="latitude", long_name='latitude', standard_name='latitude', shape=(len(data), ), missing_value=-999.0, units="degrees_north", range=(-90, 90)), "y")) coords.append( Coord( data['time'], Metadata(name="time", long_name='time', standard_name='time', shape=(len(data), ), missing_value=-999.0, units="days since 1600-01-01 00:00:00"), "t")) return coords
def _create_bounded_coord_list(self): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata from cis.time_util import cis_standard_time_unit as cstu # These implement a lot of what is necessary, but aren't in CIS style from acp_utils import rolling_window from orbit import MODIS lat_data = [] lat_bounds = [] lon_data = [] lon_bounds = [] time_data = [] time_bounds = [] for fname in self.filenames: prod = MODIS(fname) lat_data.append(prod.lat) lon_data.append(prod.lon) lat_c = rolling_window(prod.lat_corner, (2, 2)) lat_bounds.append(lat_c.reshape(prod.shape + (4, ))) lon_c = rolling_window(prod.lon_corner, (2, 2)) lon_bounds.append(lon_c.reshape(prod.shape + (4, ))) t = prod.get_time() time_data.append(t) b = np.stack([t, np.roll(t, -1)], axis=2) b[-1, :, 1] = 2 * t[-1, :] - t[-2, :] time_bounds.append(b) # TODO: Properly define metadata lat_meta = Metadata(standard_name="latitude", units="degrees") lon_meta = Metadata(standard_name="longitude", units="degrees") time_meta = Metadata(standard_name="time", units=cstu) lat = Coord(concatenate(lat_data), lat_meta, "Y") lat.update_shape() lat.update_range() lat.bounds = concatenate(lat_bounds).reshape(lat.shape + (4, )) lat.bounds[..., 2:4] = lat.bounds[..., [3, 2]] lon = Coord(concatenate(lon_data), lon_meta, "Y") lon.update_shape() lon.update_range() lon.bounds = concatenate(lon_bounds).reshape(lon.shape + (4, )) lon.bounds[..., 2:4] = lon.bounds[..., [3, 2]] time = Coord(concatenate(time_data), time_meta, "T") time.update_shape() time.update_range() time.bounds = concatenate(time_bounds) return CoordList([lat, lon, time])
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata, get_netcdf_file_variables from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError # We have to read it once first to find out which variables are in there. We assume the set of coordinates in # all the files are the same file_variables = get_netcdf_file_variables(filenames[0]) axis_lookup = {"longitude": "x", 'latitude': 'y', 'altitude': 'z', 'time': 't', 'air_pressure': 'p'} coord_variables = [(v, axis_lookup[v]) for v in file_variables if v in axis_lookup] # Create a copy to contain all the variables to read all_variables = list(coord_variables) if usr_variable is not None: all_variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(all_variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in all_variables]) for name, axis in coord_variables: try: coords.append(Coord(var_data[name], get_metadata(var_data[name][0]), axis=axis)) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: res = UngriddedData(var_data[usr_variable], get_metadata(var_data[usr_variable][0]), coords) return res
def _create_one_dimensional_coord_list(self, filenames): from cis.time_util import cis_standard_time_unit # list of coordinate variables we are interested in variables = ['MODIS_latitude', 'MODIS_longitude', 'TAI_start', 'Profile_time'] # reading the various files logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) # latitude lat = sdata['MODIS_latitude'] lat_data = hdf.read_data(lat, self._get_cloudsat_sds_data) lat_metadata = hdf.read_metadata(lat, "SD") lat_metadata.shape = lat_data.shape lat_metadata.standard_name = 'latitude' lat_coord = Coord(lat_data, lat_metadata) # longitude lon = sdata['MODIS_longitude'] lon_data = hdf.read_data(lon, self._get_cloudsat_sds_data) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = lon_data.shape lon_metadata.standard_name = 'longitude' lon_coord = Coord(lon_data, lon_metadata) # time coordinate time_data = self._generate_time_array(vdata) time_coord = Coord(time_data, Metadata(name='Profile_time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "X") # create object containing list of coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def _create_coordinates_list(self, data_variables, variable_selector): """ Create a co-ordinate list for the data :param data_variables: the load data :param variable_selector: the variable selector for the data :return: a list of coordinates """ coords = CoordList() # Time time_coord = self._create_time_coord(variable_selector.time_stamp_info, variable_selector.time_variable_name, data_variables) coords.append(time_coord) # Lat and Lon # Multiple points counts for multiple files points_count = [np.product(var.shape) for var in data_variables[variable_selector.time_variable_name]] if variable_selector.station: lat_coord = self._create_fixed_value_coord("Y", variable_selector.station_latitude, "degrees_north", points_count, "latitude") lon_coord = self._create_fixed_value_coord("X", variable_selector.station_longitude, "degrees_east", points_count, "longitude") else: lat_coord = self._create_coord("Y", variable_selector.latitude_variable_name, data_variables, "latitude") lon_coord = self._create_coord("X", variable_selector.longitude_variable_name, data_variables, "longitude") coords.append(lat_coord) coords.append(lon_coord) # Altitude if variable_selector.altitude is None: altitude_coord = self._create_coord("Z", variable_selector.altitude_variable_name, data_variables, "altitude") else: altitude_coord = self._create_fixed_value_coord("Z", variable_selector.altitude, "meters", points_count, "altitude") coords.append(altitude_coord) # Pressure if variable_selector.pressure_variable_name is not None: coords.append( self._create_coord("P", variable_selector.pressure_variable_name, data_variables, "air_pressure")) return coords
def create_data_object(self, filenames, variable): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedData var_data = read_many_files_individually(filenames, ["longitude", "latitude", "time", variable]) lon = Coord(var_data["longitude"], get_metadata(var_data["longitude"][0]), axis="x") lat = Coord(var_data["latitude"], get_metadata(var_data["latitude"][0]), axis="y") time = Coord(var_data["time"], get_metadata(var_data["time"][0]), axis="t") coords = CoordList([lat, lon, time]) usr_var_data = var_data[variable] return UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), coords)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("aerosol_backscatter_coefficient", "m-1 sr-1")] logging.info("Listing coordinates: " + str(variables)) coords = CoordList() for variable in variables: try: var_data = read_many_files_individually( filenames, variable[0])[variable[0]] coords.append( Coord(var_data, get_metadata(var_data[0]), axis=variable[1])) except InvalidVariableError: pass return UngriddedCoordinates(coords)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("relative_humidity", "RH"), ("surface_air_pressure", "Pa"), ("air_temprature", "K"), ("wind_speed"), ("Wind Diretion"), ("rainfall_rate")] logging.info("Listing coordinates: " + str(variables)) coords = CoordList() for variable in variables: try: var_data = read_many_files_individually( filenames, variable[0])[variable[0]] coords.append( Coord(var_data, get_metadata(var_data[0]), axis=variable[1])) except InvalidVariableError: pass return UngriddedCoordinates(coords)
def _create_coord_list(self, filenames, variable=None): import datetime as dt from cis.time_util import convert_time_since_to_std_time, cis_standard_time_unit from cis.utils import concatenate from cf_units import Unit from geotiepoints import modis5kmto1km variables = ['Latitude', 'Longitude', 'View_time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat_data = hdf.read_data(sdata['Latitude'], _get_MODIS_SDS_data) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lon_data = hdf.read_data(sdata['Longitude'], _get_MODIS_SDS_data) lon_metadata = hdf.read_metadata(sdata['Longitude'], "SD") if apply_interpolation: lon_data, lat_data = modis5kmto1km(lon_data, lat_data) lat_coord = Coord(lat_data, lat_metadata, 'Y') lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['View_time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_metadata.units = cis_standard_time_unit t_arrays = [] for f, d in zip(filenames, time): time_start = self._get_start_date(f) t_data = _get_MODIS_SDS_data( d) / 24.0 # Convert hours since to days since t_offset = time_start - dt.datetime(1600, 1, 1) # Convert to CIS time t_arrays.append(t_data + t_offset.days) time_coord = Coord(concatenate(t_arrays), time_metadata, "T") return CoordList([lat_coord, lon_coord, time_coord])
def test_GIVEN_missing_coord_values_WHEN_data_flattened_THEN_missing_values_removed( self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) data = np.reshape(np.arange(15) + 1.0, (5, 3)) ug = UngriddedData(None, Metadata(), coords, lambda x: data) data = ug.data_flattened assert_that(len(data), is_(14))
def test_GIVEN_missing_coord_values_WHEN_coords_THEN_missing_values_removed( self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) coords = ug.coords() for coord in coords: assert_that(len(coord.data), is_(14))
def _create_coord_list(self, filenames, data=None): from cis.data_io.ungridded_data import Metadata from cis.time_util import cis_standard_time_unit as ct import numpy as np if data is None: data = load_multiple_hysplit(filenames) # TODO error handling coords = CoordList() #print(data['DATETIMES']) latM = Metadata(standard_name="latitude", shape=(len(data['LAT']), ), units="degrees_north", range=(-90, 90)) lonM = Metadata(standard_name="longitude", shape=(len(data['LON']), ), units="degrees_east", range=(-180, 180)) altM = Metadata(standard_name="altitude", shape=(len(data['ALT']), ), units="m") timeM = Metadata(standard_name="time", shape=(len(data['DATETIMES']), ), units=str(ct)) #timeM = Metadata(name="DateTime", standard_name="time", shape=(len(data['DATETIMES']),), units=str(ct)) pressM = Metadata(standard_name="air_pressure", shape=(len(data['PRESSURE']), ), units="Pa") #start_timeM = Metadata(name="start_time", standard_name="forecast_reference_time", shape=(len(data['STARTING_TIME']),), units=str(ct)) #start_heightM = Metadata(name="start_height", shape=(len(data['STARTING_HEIGHT']),), units="meters") #station_noM = Metadata(name="station_no", standard_name="institution", shape=(len(data['STATION_NO']),)) coords.append(Coord(data['DATETIMES'], timeM)) coords.append(Coord(data['PRESSURE'], pressM)) coords.append(Coord(data['LAT'], latM)) coords.append(Coord(data['LON'], lonM)) coords.append(Coord(data['ALT'], altM)) #coords.append(Coord(data['STARTING_TIME'], start_timeM)) #coords.append(Coord(data['STARTING_HEIGHT'], start_heightM)) #coords.append(Coord(data['STATION_NO'], station_noM)) return coords
def _create_one_dimensional_coord_list(self, filenames, index_offset=1): """ Create a set of coordinates appropriate for a ond-dimensional (column integrated) variable :param filenames: :param int index_offset: For 5km products this will choose the coordinates which represent the start (0), middle (1) and end (2) of the 15 shots making up each column retrieval. :return: """ from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # latitude lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def _create_coord_list(self, filenames, data=None): if data is None: data = {} #initialise data dictionary inData = netCDF4.Dataset(filenames[0]) #open netCDF file data['longitude'] = np.array(inData.variables['lon']) #extract longitudes data['latitude'] = np.array(inData.variables['lat']) #extract latitudes origTimes = np.array(inData.variables['time_counter']) #extract times #Convert time to days since niceDateTime = cf_units.num2date(origTimes,'seconds since 1999-01-01 00:00:00', 'gregorian') data['time_counter'] = cf_units.date2num(niceDateTime,'days since 1600-01-01 00:00:00', 'gregorian') inData.close() #close netCDF file coords = CoordList() #initialise coordinate list #Append latitudes and longitudes to coordinate list: coords.append(Coord(data['longitude'],Metadata(name="longitude",long_name='longitude',standard_name='longitude',shape=(len(data),),missing_value=-999.0,units="degrees_east",range=(-180, 180)),"x")) coords.append(Coord(data['latitude'],Metadata(name="latitude",long_name='latitude',standard_name='latitude',shape=(len(data),),missing_value=-999.0,units="degrees_north",range=(-90, 90)),"y")) coords.append(Coord(data['time'],Metadata(name="time",long_name='time',standard_name='time',shape=(len(data),),missing_value=-999.0,units="days since 1600-01-01 00:00:00"),"t")) return coords
def create_coords(self, filenames): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, Metadata # FIXME var_data = None lon = Coord(var_data["longitude"], get_metadata(var_data["longitude"][0]), axis="x") lat = Coord(var_data["latitude"], get_metadata(var_data["latitude"][0]), axis="y") time = Coord(var_data["time"], get_metadata(var_data["time"][0]), axis="t") coords = CoordList([lon, lat, time]) return UngriddedCoordinates(coords)
def _create_coord_list(self, filenames, variable=None): import datetime as dt variables = ['Latitude', 'Longitude', 'Scan_Start_Time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat = sdata['Latitude'] sd_lat = hdf.read_data(lat, _get_MODIS_SDS_data) lat_data = self.__field_interpolate( sd_lat) if apply_interpolation else sd_lat lat_metadata = hdf.read_metadata(lat, "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') lon = sdata['Longitude'] if apply_interpolation: lon_data = self.__field_interpolate( hdf.read_data(lon, _get_MODIS_SDS_data)) else: lon_data = hdf.read_data(lon, _get_MODIS_SDS_data) lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['Scan_Start_Time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_coord = Coord(time, time_metadata, "T", _get_MODIS_SDS_data) time_coord.convert_TAI_time_to_std_time( dt.datetime(1993, 1, 1, 0, 0, 0)) return CoordList([lat_coord, lon_coord, time_coord])
class UngriddedCoordinates(CommonData): """ Wrapper (adaptor) class for the different types of possible ungridded data. """ def __init__(self, coords): """ Constructor :param coords: A list of the associated Coord objects """ from cis.data_io.Coord import CoordList, Coord if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") self._post_process() all_coords = self._coords.find_standard_coords() self.coords_flattened = [(c.data_flattened if c is not None else None) for c in all_coords] def _post_process(self): """ Perform a post processing step on lazy loaded Coordinate Data :return: """ # Remove any points with missing coordinate values: combined_mask = numpy.zeros(self._coords[0].data_flattened.shape, dtype=bool) for coord in self._coords: combined_mask |= numpy.ma.getmaskarray(coord.data_flattened) if coord.data.dtype != 'object': combined_mask |= numpy.isnan(coord.data).flatten() if combined_mask.any(): n_points = numpy.count_nonzero(combined_mask) logging.warning("Identified {n_points} point(s) which were missing values for some or all coordinates - " "these points have been removed from the data.".format(n_points=n_points)) for coord in self._coords: coord.data = numpy.ma.masked_array(coord.data_flattened, mask=combined_mask).compressed() coord.update_shape() coord.update_range() @property def history(self): return "UngriddedCoordinates have no history" @property def x(self): return self.coord(axis='X') @property def y(self): return self.coord(axis='Y') @property def lat(self): return self.coord(standard_name='latitude') @property def lon(self): return self.coord(standard_name='longitude') @property def time(self): return self.coord(standard_name='time') def hyper_point(self, index): """ :param index: The index in the array to find the point for :return: A hyperpoint representing the data at that point """ from cis.data_io.hyperpoint import HyperPoint return HyperPoint(self.coord(standard_name='latitude').data.flat[index], self.coord(standard_name='longitude').data.flat[index], self.coord(standard_name='altitude').data.flat[index], self.coord(standard_name='time').data.flat[index], self.coord(standard_name='air_pressure').data.flat[index], None) def as_data_frame(self, copy=True): """ Convert an UngriddedCoordinates object to a Pandas DataFrame. :param copy: Create a copy of the data for the new DataFrame? Default is True. :return: A Pandas DataFrame representing the data and coordinates. Note that this won't include any metadata. """ return _coords_as_data_frame(self._coords) def coords(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None, dim_coords=True): """ :return: A list of coordinates in this UngriddedData object fitting the given criteria """ return self._coords.get_coords(name_or_coord, standard_name, long_name, attributes, axis) def coord(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None): """ :raise: CoordinateNotFoundError :return: A single coord given the same arguments as :meth:`coords`. """ return self._coords.get_coord(name_or_coord, standard_name, long_name, attributes, axis) def get_coordinates_points(self): return UngriddedHyperPointView(self.coords_flattened, None) def get_all_points(self): """Returns a HyperPointView of the points. :return: HyperPointView of all the data points """ return UngriddedHyperPointView(self.coords_flattened, None) def get_non_masked_points(self): """Returns a HyperPointView for which the default iterator omits masked points. :return: HyperPointView of the data points """ return UngriddedHyperPointView(self.coords_flattened, None, non_masked_iteration=True) @property def is_gridded(self): """Returns value indicating whether the data/coordinates are gridded. """ return False
class UngriddedData(LazyData, CommonData): """ Wrapper (adaptor) class for the different types of possible ungridded data. """ def __init__(self, data, metadata, coords, data_retrieval_callback=None): """ Constructor :param data: The data handler (e.g. SDS instance) for the specific data type, or a numpy array of data. This can be a list of data handlers, or a single data handler :param metadata: Any associated metadata :param coords: A list of the associated Coord objects :param data_retrieval_callback: A method for retrieving data when needed """ from cis.data_io.Coord import CoordList, Coord if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") # TODO Find a cleaner workaround for this, for some reason UDUNITS can not parse 'per kilometer per steradian' if str(metadata.units) == 'per kilometer per steradian': metadata.units = 'kilometer^-1 steradian^-1' super(UngriddedData, self).__init__(data, metadata, data_retrieval_callback) @property def coords_flattened(self): all_coords = self.coords().find_standard_coords() return [(c.data_flattened if c is not None else None) for c in all_coords] def _post_process(self): """ Perform a post processing step on lazy loaded Ungridded Data. :return: """ # Load the data if not already loaded if self._data is None: data = self.data else: # Remove any points with missing coordinate values: combined_mask = numpy.zeros(self._data.shape, dtype=bool).flatten() for coord in self._coords: combined_mask |= numpy.ma.getmaskarray(coord.data).flatten() if coord.data.dtype != 'object': combined_mask |= numpy.isnan(coord.data).flatten() if combined_mask.any(): n_points = numpy.count_nonzero(combined_mask) logging.warning( "Identified {n_points} point(s) which were missing values for some or all coordinates - " "these points have been removed from the data.".format(n_points=n_points)) for coord in self._coords: coord.data = numpy.ma.masked_array(coord.data.flatten(), mask=combined_mask).compressed() coord.update_shape() coord.update_range() if numpy.ma.is_masked(self._data): new_data_mask = numpy.ma.masked_array(self._data.mask.flatten(), mask=combined_mask).compressed() new_data = numpy.ma.masked_array(self._data.data.flatten(), mask=combined_mask).compressed() self._data = numpy.ma.masked_array(new_data, mask=new_data_mask) else: self._data = numpy.ma.masked_array(self._data.flatten(), mask=combined_mask).compressed() self.update_shape() self.update_range() def make_new_with_same_coordinates(self, data=None, var_name=None, standard_name=None, long_name=None, history=None, units=None, flatten=False): """ Create a new, empty UngriddedData object with the same coordinates as this one. :param data: Data to use (if None then defaults to all zeros) :param var_name: Variable name :param standard_name: Variable CF standard name :param long_name: Variable long name :param history: Data history string :param units: Variable units :param flatten: Whether to flatten the data and coordinates (for ungridded data only) :return: UngriddedData instance """ if data is None: data = numpy.zeros(self.shape) metadata = Metadata(name=var_name, standard_name=standard_name, long_name=long_name, history='', units=units) if flatten: from cis.data_io.Coord import Coord data = data.flatten() new_coords = [] for coord in self._coords: new_coords.append(Coord(coord.data_flattened, coord.metadata, coord.axis)) else: new_coords = self._coords ug_data = UngriddedData(data=data, metadata=metadata, coords=new_coords) # Copy the history in separately so it gets the timestamp added. if history: ug_data.add_history(history) return ug_data def copy(self): """ Create a copy of this UngriddedData object with new data and coordinates so that that they can be modified without held references being affected. Will call any lazy loading methods in the data and coordinates :return: Copied UngriddedData object """ data = numpy.ma.copy(self.data) # This will load the data if lazy load coords = self.coords().copy() return UngriddedData(data=data, metadata=self.metadata, coords=coords) @property def history(self): return self.metadata.history @property def x(self): return self.coord(axis='X') @property def y(self): return self.coord(axis='Y') @property def lat(self): return self.coord(standard_name='latitude') @property def lon(self): return self.coord(standard_name='longitude') @property def time(self): return self.coord(standard_name='time') def hyper_point(self, index): """ :param index: The index in the array to find the point for :return: A hyperpoint representing the data at that point """ from cis.data_io.hyperpoint import HyperPoint return HyperPoint(self.coord(standard_name='latitude').data.flat[index], self.coord(standard_name='longitude').data.flat[index], self.coord(standard_name='altitude').data.flat[index], self.coord(standard_name='time').data.flat[index], self.coord(standard_name='air_pressure').data.flat[index], self.data.flat[index]) def as_data_frame(self, copy=True): """ Convert an UngriddedData object to a Pandas DataFrame. :param copy: Create a copy of the data for the new DataFrame? Default is True. :return: A Pandas DataFrame representing the data and coordinates. Note that this won't include any metadata. """ df = _coords_as_data_frame(self.coords()) try: df[self.name()] = _to_flat_ndarray(self.data, copy) except ValueError: logging.warn("Copy created of MaskedArray for {} when creating Pandas DataFrame".format(self.name())) df[self.name()] = _to_flat_ndarray(self.data, True) return df def coords(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None, dim_coords=True): """ :return: A list of coordinates in this UngriddedData object fitting the given criteria """ self._post_process() return self._coords.get_coords(name_or_coord, standard_name, long_name, attributes, axis) def coord(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None): """ :raise: CoordinateNotFoundError :return: A single coord given the same arguments as :meth:`coords`. """ return self.coords().get_coord(name_or_coord, standard_name, long_name, attributes, axis) def get_coordinates_points(self): """Returns a HyperPointView of the coordinates of points. :return: HyperPointView of the coordinates of points """ return UngriddedHyperPointView(self.coords_flattened, None) def get_all_points(self): """Returns a HyperPointView of the points. :return: HyperPointView of all the data points """ return UngriddedHyperPointView(self.coords_flattened, self.data_flattened) def get_non_masked_points(self): """Returns a HyperPointView for which the default iterator omits masked points. :return: HyperPointView of the data points """ return UngriddedHyperPointView(self.coords_flattened, self.data_flattened, non_masked_iteration=True) def find_standard_coords(self): """Constructs a list of the standard coordinates. The standard coordinates are latitude, longitude, altitude, air_pressure and time; they occur in the return list in this order. :return: list of coordinates or None if coordinate not present """ return self.coords().find_standard_coords() @property def is_gridded(self): """Returns value indicating whether the data/coordinates are gridded. """ return False @classmethod def from_points_array(cls, hyperpoints): """ Constuctor for building an UngriddedData object from a list of hyper points :param hyperpoints: list of HyperPoints """ from cis.data_io.Coord import Coord, CoordList from cis.data_io.hyperpoint import HyperPointList if not isinstance(hyperpoints, HyperPointList): hyperpoints = HyperPointList(hyperpoints) values = hyperpoints.vals latitude = hyperpoints.latitudes longitude = hyperpoints.longitudes air_pressure = hyperpoints.air_pressures altitude = hyperpoints.altitudes time = hyperpoints.times coord_list = [] if latitude is not None: coord_list.append(Coord(latitude, Metadata(standard_name='latitude', units='degrees north'))) if longitude is not None: coord_list.append(Coord(longitude, Metadata(standard_name='longitude', units='degrees east'))) if air_pressure is not None: coord_list.append(Coord(air_pressure, Metadata(standard_name='air_pressure', units='Pa'))) if altitude is not None: coord_list.append(Coord(altitude, Metadata(standard_name='altitude', units='meters'))) if time is not None: coord_list.append(Coord(time, Metadata(standard_name='time', units='seconds'))) coords = CoordList(coord_list) return cls(values, Metadata(), coords) def summary(self): """ Unicode summary of the UngriddedData with metadata of itself and its coordinates """ summary = 'Ungridded data: {name} / ({units}) \n'.format(name=self.name(), units=self.units) summary += ' Shape = {}\n'.format(self.data.shape) + '\n' summary += ' Total number of points = {}\n'.format(self.data.size) num_non_masked_points = self.data.count() if hasattr(self.data, 'count') else self.data.size summary += ' Number of non-masked points = {}\n'.format(num_non_masked_points) summary += str(self.metadata) summary += ' Coordinates: \n' for c in self.coords(): summary += '{pad:{width}}{name}\n'.format(pad=' ', width=7, name=c.name()) c.update_range() summary += c.metadata.summary(offset=10) return summary def __str__(self): return self.summary().encode(errors='replace') def __unicode__(self): return self.summary()
def _create_coord_list(self, filenames, index_offset=0): import logging from cis.data_io import hdf as hdf from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata import cis.utils as utils from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_data *= 1000.0 # Convert to m len_x = alt_data.shape[0] lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data) len_y = lat_data.shape[0] new_shape = (len_x, len_y) # altitude alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0) alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape) alt_coord = Coord(alt_data, alt_metadata) # pressure if self.include_pressure: pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD") # Fix badly formatted units which aren't CF compliant and will break if they are aggregated if str(pres_metadata.units) == "hPA": pres_metadata.units = "hPa" pres_metadata.shape = new_shape pres_coord = Coord(pres_data, pres_metadata, 'P') # latitude lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_metadata.shape = new_shape lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data) lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = new_shape lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data) time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) coords.append(alt_coord) if self.include_pressure and (pres_data.shape == alt_data.shape): # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid # than for the Lidar_Data_Altitudes. coords.append(pres_coord) return coords