def test_aggregating_over_time_with_default_times(self): from datetime import datetime, timedelta from cis.time_util import cis_standard_time_unit as tunit data = make_regular_2d_with_time_ungridded_data() data.time.convert_to_std_time() output = data.aggregate(t=slice(None, None, timedelta(days=30))) expected_t_bounds = [[tunit.date2num(datetime(1984, 8, 27)), tunit.date2num(datetime(1984, 9, 10))]] assert_arrays_equal(output[0].coord('time').bounds, expected_t_bounds) assert_arrays_equal(output[0].data, [[[7.5]]])
def test_aggregating_over_time_with_partial_datetime(self): from cis.time_util import PartialDateTime, cis_standard_time_unit as tunit from datetime import datetime, timedelta data = make_regular_2d_with_time_ungridded_data() data.time.convert_to_std_time() output = data.aggregate(t=[PartialDateTime(1984,9), timedelta(days=30)]) expected_t_bounds = [[tunit.date2num(datetime(1984, 9, 1)), tunit.date2num(datetime(1984, 10, 1))]] assert_arrays_almost_equal(output[0].coord('time').bounds, expected_t_bounds) assert_arrays_almost_equal(output[0].data, [[[10.5]]])
def test_aeronet_time_parsing(self): # 1.8s from datetime import datetime from cis.time_util import cis_standard_time_unit as ct aeronet_data = load_aeronet(valid_aeronet_filename, [valid_aeronet_variable]) assert_almost_equal(aeronet_data['datetime'][0], ct.date2num(datetime(2003, 9, 25, 6, 47, 9))) assert_almost_equal(aeronet_data['datetime'][5], ct.date2num(datetime(2003, 9, 25, 7, 10, 37))) assert_almost_equal(aeronet_data['datetime'][76], ct.date2num(datetime(2003, 9, 27, 13, 28, 2)))
def test_aggregating_over_time_with_default_times(self): from datetime import datetime, timedelta from cis.time_util import cis_standard_time_unit as tunit data = make_regular_2d_with_time_ungridded_data() data.time.convert_to_std_time() output = data.aggregate(t=slice(None, None, timedelta(days=30))) expected_t_bounds = [[ tunit.date2num(datetime(1984, 8, 27)), tunit.date2num(datetime(1984, 9, 10)) ]] assert_arrays_equal(output[0].coord('time').bounds, expected_t_bounds) assert_arrays_equal(output[0].data, [[[7.5]]])
def test_aggregating_over_time_with_partial_datetime(self): from cis.time_util import PartialDateTime, cis_standard_time_unit as tunit from datetime import datetime, timedelta data = make_regular_2d_with_time_ungridded_data() data.time.convert_to_std_time() output = data.aggregate( t=[PartialDateTime(1984, 9), timedelta(days=30)]) expected_t_bounds = [[ tunit.date2num(datetime(1984, 9, 1)), tunit.date2num(datetime(1984, 10, 1)) ]] assert_arrays_almost_equal(output[0].coord('time').bounds, expected_t_bounds) assert_arrays_almost_equal(output[0].data, [[[10.5]]])
def test_pressure_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'air_pressure': [24.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }) constraint = SepConstraintKdtree(p_sep=2) # This should leave us with 20 points: [ 6. 7. 8. 9. 10.] # [ 11. 12. 13. 14. 15.] # [ 16. 17. 18. 19. 20.] # [ 21. 22. 23. 24. 25.] ref_vals = np.array([ 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25. ]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_time_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }) # 1 day (and a little bit) time seperation constraint = SepConstraintKdtree(t_sep='P1dT1M') # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_alt_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }) # 15m altitude separation a_sep = 15 constraint = SepConstraintKdtree(a_sep=a_sep) # This should leave us with 15 points: [ 21. 22. 23. 24. 25.] # [ 26. 27. 28. 29. 30.] # [ 31. 32. 33. 34. 35.] ref_vals = np.array([ 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35. ]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_all_constraints_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.DataFrame(data={'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'air_pressure': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}).iloc[0] # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction h_sep = 1000 # 15m altitude separation a_sep = 15 # 1 day (and a little bit) time separation t_sep = 'P1dT1M' # Pressure constraint is 50/40 < p_sep < 60/50 p_sep = 1.22 constraint = SepConstraintKdtree(h_sep=h_sep, a_sep=a_sep, p_sep=p_sep, t_sep=t_sep) index = HaversineDistanceKDTreeIndex() index.index_data(None, ug_data_points, None) constraint.haversine_distance_kd_tree_index = index # This should leave us with 9 points: [[ 22, 23, 24] # [ 27, 28, 29] # [ 32, 33, 34]] ref_vals = np.array([27., 28., 29., 32., 33., 34.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def parse_as_number_or_standard_time(string): """ Parse a string as a number from the command line, or if that fails, as a datetime in standard cis units :param in_string: String to parse :return: int, float (possibly representing a time in CIS standard time units) """ from datetime import datetime res = parse_as_number_or_datetime(string) if isinstance(res, datetime): res = cis_standard_time_unit.date2num(res) return res
def load_aeronet(filename, variables=None): """ Loads aeronet csv file. :param filename: data file name :param variables: A list of variables to return :return: A dictionary of variables names and numpy arrays containing the data for that variable """ from cis.exceptions import InvalidVariableError from cis.time_util import cis_standard_time_unit from numpy.ma import masked_invalid from pandas import read_csv, to_datetime version = get_aeronet_version(filename) ordered_vars = get_aeronet_file_variables(filename, version) if len(ordered_vars) == 0: return {} # Load all available geolocation information and any requested variables cols = [var for var in ("date", "time", "latitude", "longitude", "altitude") if var in ordered_vars] if cols is not None and variables is not None: cols.extend(variables) dtypes = {var:'str' if var in ("date", "time") else "float" for var in cols} try: rawd = read_csv(filename, sep=",", header=AERONET_HEADER_LENGTH[version]-1, names=ordered_vars, index_col=False, usecols=cols, na_values=AERONET_MISSING_VALUE[version], dtype=dtypes, parse_dates={"datetime":["date", "time"]}, infer_datetime_format=True, dayfirst=True, error_bad_lines=False, warn_bad_lines=True, #low_memory="All_Sites_Times_All_Points" in filename ) except ValueError: raise InvalidVariableError("{} not available in {}".format(variables, filename)) # Empty file if rawd.shape[0] == 0: return {"datetime":[], "latitude":[], "longitude":[], "altitude":[]} # Convert pandas Timestamps into CIS standard numbers rawd["datetime"] = [cis_standard_time_unit.date2num(timestamp.to_pydatetime()) for timestamp in to_datetime(rawd["datetime"], format='%d:%m:%Y %H:%M:%S')] # Add position metadata that isn't listed in every line for some formats if version.startswith("MAN"): rawd["altitude"] = 0. elif version.endswith("2"): metadata = get_file_metadata(filename) rawd["longitude"] = float(metadata.misc[2][1].split("=")[1]) rawd["latitude"] = float(metadata.misc[2][2].split("=")[1]) rawd["altitude"] = float(metadata.misc[2][3].split("=")[1]) return {var : masked_invalid(arr) for var, arr in rawd.items()}
def make_regular_2d_with_time_ungridded_data(): """ Makes a well defined ungridded data object of shape 5x3 with data as follows array([[1,2,3], [4,5,6], [7,8,9], [10,11,12], [13,14,15]]) and coordinates in latitude: array([[-10,-10,-10], [-5,-5,-5], [0,0,0], [5,5,5], [10,10,10]]) longitude: array([[-5,0,5], [-5,0,5], [-5,0,5], [-5,0,5], [-5,0,5]]) time: np.array( [ 15 day increments from 27th August 1984 ] ) They are different lengths to make it easier to distinguish. Note the latitude increases as you step through the array in order - so downwards as it's written above """ import numpy as np from cis.data_io.Coord import CoordList, Coord from cis.data_io.ungridded_data import UngriddedData, Metadata import datetime from cis.time_util import cis_standard_time_unit x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) t0 = datetime.datetime(1984, 8, 27) times = np.reshape(np.array([t0 + datetime.timedelta(days=d) for d in range(15)]), (5, 3)) x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) t = Coord(cis_standard_time_unit.date2num(times), Metadata(standard_name='time', units=cis_standard_time_unit)) data = np.reshape(np.arange(15) + 1.0, (5, 3)) coords = CoordList([x, y, t]) return UngriddedData(data, Metadata(name='rain', standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), coords)
def test_all_constraints_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.DataFrame( data={ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'air_pressure': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }).iloc[0] # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction h_sep = 1000 # 15m altitude separation a_sep = 15 # 1 day (and a little bit) time separation t_sep = 'P1dT1M' # Pressure constraint is 50/40 < p_sep < 60/50 p_sep = 1.22 constraint = SepConstraintKdtree(h_sep=h_sep, a_sep=a_sep, p_sep=p_sep, t_sep=t_sep) index = HaversineDistanceKDTreeIndex() index.index_data(None, ug_data_points, None) constraint.haversine_distance_kd_tree_index = index # This should leave us with 9 points: [[ 22, 23, 24] # [ 27, 28, 29] # [ 32, 33, 34]] ref_vals = np.array([27., 28., 29., 32., 33., 34.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_time_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}) # 1 day (and a little bit) time seperation constraint = SepConstraintKdtree(t_sep='P1dT1M') # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def make_dummy_ungridded_data_time_series(len=10): """ Create a time series of ungridded data of length len, with a single lat/lon coordinate (65.2, -12.1) :param len: length of teh time series and associated data :return: """ from datetime import datetime, timedelta from cis.time_util import cis_standard_time_unit from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedData, Metadata t0 = datetime(1984, 8, 27) times = np.array([t0 + timedelta(days=d) for d in range(len)]) x = Coord(np.zeros(len) + 65.2, Metadata(standard_name='latitude', units='degrees')) y = Coord(np.zeros(len) - 12.1, Metadata(standard_name='longitude', units='degrees')) t = Coord(cis_standard_time_unit.date2num(times), Metadata(standard_name='time', units=cis_standard_time_unit), axis='x') data = np.arange(len) + 1.0 return UngriddedData(data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), CoordList([x, y, t]))
def test_pressure_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0], 'air_pressure': [24.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}) constraint = SepConstraintKdtree(p_sep=2) # This should leave us with 20 points: [ 6. 7. 8. 9. 10.] # [ 11. 12. 13. 14. 15.] # [ 16. 17. 18. 19. 20.] # [ 21. 22. 23. 24. 25.] ref_vals = np.array([6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_alt_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}) # 15m altitude separation a_sep = 15 constraint = SepConstraintKdtree(a_sep=a_sep) # This should leave us with 15 points: [ 21. 22. 23. 24. 25.] # [ 26. 27. 28. 29. 30.] # [ 31. 32. 33. 34. 35.] ref_vals = np.array([21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def parse_as_number_or_datetime(in_string, name, parser): """Parse a string as a number from the command line, or if that fails, as a datetime, reporting parse errors. The string should be in an ISO 8601 format except that the date and time parts may be separated by a space or colon instead of T. :param in_string: String to parse :param name: A description of the argument used for error messages :param parser: The parser used to report errors :return: int, or float value (possibly converted to the standard time from a time string) """ import dateutil.parser as du try: ret = int(in_string) except ValueError: try: ret = float(in_string) except ValueError: try: ret = cis_standard_time_unit.date2num(du.parse(in_string)) except ValueError: parser.error("'" + in_string + "' is not a valid " + name) ret = None return ret
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def parse_datetimestr_to_std_time(s): import dateutil.parser as du return cis_standard_time_unit.date2num(du.parse(s))
def create_data_object(self, filenames, variable): from netCDF4 import Dataset from biggus import OrthoArrayAdapter from iris.cube import Cube, CubeList from iris.coords import DimCoord from iris.fileformats.netcdf import NetCDFDataProxy from datetime import datetime from os.path import basename from cis.time_util import cis_standard_time_unit from cis.data_io.gridded_data import make_from_cube import numpy as np cubes = CubeList() for f in filenames: # Open the file ds = Dataset(f) # E.g. 'NO2.COLUMN.VERTICAL.TROPOSPHERIC.CS30_BACKSCATTER.SOLAR' v = ds.variables[variable] # Get the coords lat = ds.variables['LATITUDE'] lon = ds.variables['LONGITUDE'] # Create a biggus adaptor over the data scale_factor = getattr(v, 'scale_factor', None) add_offset = getattr(v, 'add_offset', None) if scale_factor is None and add_offset is None: v_dtype = v.datatype elif scale_factor is not None: v_dtype = scale_factor.dtype else: v_dtype = add_offset.dtype # proxy = NetCDFDataProxy(v.shape, v_dtype, f, variable, float(v.VAR_FILL_VALUE)) # a = OrthoArrayAdapter(proxy) # Mask out all invalid values (NaN, Inf, etc) a = np.ma.masked_invalid(v[:]) # Set everything negative to NaN a = np.ma.masked_less(a, 0.0) # Just read the lat and lon in directly lat_coord = DimCoord(lat[:], standard_name='latitude', units='degrees', long_name=lat.VAR_DESCRIPTION) lon_coord = DimCoord(lon[:], standard_name='longitude', units='degrees', long_name=lon.VAR_DESCRIPTION) # Pull the date out of the filename fname = basename(f) dt = datetime.strptime(fname[:10], "%Y_%m_%d") t_coord = DimCoord(cis_standard_time_unit.date2num(dt), standard_name='time', units=cis_standard_time_unit) c = Cube(a, long_name=getattr(v, "VAR_DESCRIPTION", None), units=getattr(v, "VAR_UNITS", None), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)]) c.add_aux_coord(t_coord) # Close the file ds.close() cubes.append(c) # We have a scalar time coord and no conflicting metadata so this should just create one cube... merged = cubes.merge_cube() # Return as a CIS GriddedData object return make_from_cube(merged)
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num( datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data( hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError( "Unexpected number of dimensions for CALIOP data: {}". format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def parse_as_number_or_datetime_can_parse_date_as_datetime(): from datetime import datetime from cis.time_util import cis_standard_time_unit parser = MockParser() dt = parse_as_number_or_datetime('2010-07-01', 'date/time arg', parser) assert (dt == cis_standard_time_unit.date2num(datetime(2010, 7, 1)))