def test_GIVEN_ungridded_coords_with_time_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self): from datetime import datetime ug = make_regular_2d_with_time_ungridded_data() ug_coords = UngriddedCoordinates(ug._coords) df = ug_coords.as_data_frame() assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) assert_that(df['longitude'][datetime(1984,8,28)] == 0)
def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) self.x = Coord(x, Metadata(standard_name='latitude', units='degrees')) self.y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([self.x, self.y]) self.ug = UngriddedCoordinates(coords)
def test_GIVEN_ungridded_coords_with_time_WHEN_call_as_data_frame_THEN_returns_valid_data_frame( self): from datetime import datetime ug = make_regular_2d_with_time_ungridded_data() ug_coords = UngriddedCoordinates(ug._coords) df = ug_coords.as_data_frame() assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) assert_that(df['longitude'][datetime(1984, 8, 28)] == 0)
def test_can_create_ungridded_coordinates(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) standard_coords = ug.coords().find_standard_coords() assert(standard_coords == [x, y, None, None, None])
def test_get_coordinates_points_returns_points(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) points = ug.get_coordinates_points() num_points = len([p for p in points]) assert(num_points == 15)
def create_coords(self, filenames, variable=None): """ Reads the coordinates and data if required from the files :param filenames: List of filenames to read coordinates from :param variable: load a variable for the data :return: Coordinates """ data_variables, variable_selector = self._load_data( filenames, variable) dim_coords = self._create_coordinates_list(data_variables, variable_selector) if variable is None: return UngriddedCoordinates(dim_coords) else: aux_coord_name = variable_selector.find_auxiliary_coordinate( variable) if aux_coord_name is not None: all_coords = self._add_aux_coordinate( dim_coords, filenames[0], aux_coord_name, dim_coords.get_coord(standard_name='time').data.size) else: all_coords = dim_coords return UngriddedData(data_variables[variable], get_metadata(data_variables[variable][0]), all_coords)
def test_GIVEN_missing_coord_values_WHEN_coords_THEN_missing_values_removed(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) coords = ug.coords() for coord in coords: assert_that(len(coord.data), is_(14))
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y")] # if usr_variable is not None: # variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in variables]) for var, (name, axis) in zip(var_data.values(), variables): try: coords.append(Coord(var, get_metadata(var[0]), axis=axis)) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: usr_var_data = read_many_files_individually( filenames, usr_variable)[usr_variable] res = UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), coords) return res
def create_coords(self, filenames, variable=None): """ Reads the coordinates and data if required from the files :param filenames: List of filenames to read coordinates from :param variable: load a variable for the data :return: Coordinates """ from cis.data_io.netcdf import read_many_files_individually from cis.data_io.Coord import Coord, CoordList from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("air_pressure", "p")] dim_coords = CoordList() for v in variables: try: var_data = read_many_files_individually(filenames, v[0])[v[0]] dim_coords.append(Coord(var_data, get_metadata(var_data[0]), axis=v[1])) except InvalidVariableError: pass if variable is None: return UngriddedCoordinates(dim_coords) else: all_coords = self._add_aux_coordinate(dim_coords, filenames[0], 'DP_MID', dim_coords.get_coord(standard_name='time').data.size) usr_var_data = read_many_files_individually(filenames, variable)[variable] return UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), all_coords)
def test_GIVEN_missing_coord_values_WHEN_coords_THEN_missing_values_removed( self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) coords = ug.coords() for coord in coords: assert_that(len(coord.data), is_(14))
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata, get_netcdf_file_variables from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError # We have to read it once first to find out which variables are in there. We assume the set of coordinates in # all the files are the same file_variables = get_netcdf_file_variables(filenames[0]) def get_axis_std_name(var): axis=None lvar = var.lower() if lvar == 'longitude': axis = 'x', 'longitude' if lvar == 'latitude': axis = 'y', 'latitude' if lvar == 'G_ALT' or lvar == 'altitude' or lvar == 'pressure_altitude': axis = 'z', 'altitude' if lvar == 'time': axis = 't', 'time' if lvar == 'p' or lvar == 'pressure' or lvar == 'static_pressure': axis = 'p', 'air_pressure' return axis all_coord_variables = [(v, get_axis_std_name(v)) for v in file_variables if get_axis_std_name(v) is not None] # Get rid of any duplicates coord_variables = [] for v in all_coord_variables: if v is None or v[1][1] not in [x[1][1] for x in coord_variables]: coord_variables.append(v) all_variables = coord_variables.copy() if usr_variable is not None: all_variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(all_variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in all_variables]) for name, axis_std_name in coord_variables: try: meta = get_metadata(var_data[name][0]) if meta.standard_name is None: meta.standard_name = axis_std_name[1] coords.append(Coord(var_data[name], meta, axis=axis_std_name[0])) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: res = UngriddedData(var_data[usr_variable], get_metadata(var_data[usr_variable][0]), coords) return res
def test_order_of_coords_doesnt_matter(self): from iris.cube import Cube from iris.coords import DimCoord import numpy as np from cis.data_io.gridded_data import make_from_cube from cis.data_io.ungridded_data import UngriddedCoordinates, Metadata from cis.data_io.Coord import Coord cube_lat = DimCoord(np.linspace(-90, 90, 18), standard_name='latitude', units='degrees') cube_lon = DimCoord(np.linspace(0, 359, 36), standard_name='longitude', units='degrees', circular=True) cube_alt = DimCoord(np.linspace(0, 10000, 10), standard_name='altitude', units='meters') times = np.linspace(0, 30, 12) cube_time = DimCoord(times, standard_name='time', units='days since 1970-01-01 00:00:00') data = np.arange(12 * 18 * 36 * 10).reshape(12, 18, 36, 10) source = make_from_cube( Cube(data, dim_coords_and_dims=[(cube_time, 0), (cube_lat, 1), (cube_lon, 2), (cube_alt, 3)])) n = 10 sample_lats = np.linspace(-9.1, 9.9, n) sample_longs = np.linspace(-9.1, 9.9, n) sample_alts = np.linspace(99, 599, n) sample_times = np.linspace(0, 30, n) sample = UngriddedCoordinates([ Coord(sample_lats, Metadata('latitude')), Coord(sample_longs, Metadata('longitude')), Coord(sample_alts, Metadata('altitude')), Coord(sample_times, Metadata('time', units='days since 1970-01-01 00:00:00')) ]) col = GriddedUngriddedCollocator() output = col.collocate(sample, source, None, 'nn')[0] source.transpose() col = GriddedUngriddedCollocator() assert_equal( col.collocate(sample, source, None, 'nn')[0].data, output.data) source.transpose((2, 1, 0, 3)) col = GriddedUngriddedCollocator() assert_equal( col.collocate(sample, source, None, 'nn')[0].data, output.data)
def create_coords(self, filenames): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates var_data = read_many_files_individually(filenames, ["longitude", "latitude", "time"]) lon = Coord(var_data["longitude"], get_metadata(var_data["longitude"][0]), axis="x") lat = Coord(var_data["latitude"], get_metadata(var_data["latitude"][0]), axis="y") time = Coord(var_data["time"], get_metadata(var_data["time"][0]), axis="t") coords = CoordList([lat, lon, time]) return UngriddedCoordinates(coords)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata, get_netcdf_file_variables from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.exceptions import InvalidVariableError # We have to read it once first to find out which variables are in there. We assume the set of coordinates in # all the files are the same file_variables = get_netcdf_file_variables(filenames[0]) axis_lookup = { "longitude": "x", 'latitude': 'y', 'altitude': 'z', 'time': 't', 'air_pressure': 'p' } coord_variables = [(v, axis_lookup[v]) for v in file_variables if v in axis_lookup] # Create a copy to contain all the variables to read all_variables = list(coord_variables) if usr_variable is not None: all_variables.append((usr_variable, '')) logging.info("Listing coordinates: " + str(all_variables)) coords = CoordList() var_data = read_many_files_individually(filenames, [v[0] for v in all_variables]) for name, axis in coord_variables: try: coords.append( Coord(var_data[name], get_metadata(var_data[name][0]), axis=axis)) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: res = UngriddedData(var_data[usr_variable], get_metadata(var_data[usr_variable][0]), coords) return res
def create_coords(self, filenames, variable=None): from cis.data_io.ungridded_data import Metadata from numpy import genfromtxt, NaN from cis.exceptions import InvalidVariableError from cis.time_util import convert_datetime_to_std_time import dateutil.parser as du array_list = [] for filename in filenames: try: array_list.append(genfromtxt(filename, dtype="f8,f8,f8,O,f8", names=['latitude', 'longitude', 'altitude', 'time', 'value'], delimiter=',', missing_values='', usemask=True, invalid_raise=True, converters={"time": du.parse})) except: raise IOError('Unable to read file ' + filename) data_array = utils.concatenate(array_list) n_elements = len(data_array['latitude']) coords = CoordList() coords.append(Coord(data_array["latitude"], Metadata(standard_name="latitude", shape=(n_elements,), units="degrees_north"))) coords.append(Coord(data_array["longitude"], Metadata(standard_name="longitude", shape=(n_elements,), units="degrees_east"))) coords.append( Coord(data_array["altitude"], Metadata(standard_name="altitude", shape=(n_elements,), units="meters"))) time_arr = convert_datetime_to_std_time(data_array["time"]) time = Coord(time_arr, Metadata(standard_name="time", shape=(n_elements,), units="days since 1600-01-01 00:00:00")) coords.append(time) if variable: try: data = UngriddedData(data_array['value'], Metadata(name="value", shape=(n_elements,), units="unknown", missing_value=NaN), coords) except: InvalidVariableError("Value column does not exist in file " + filenames) return data else: return UngriddedCoordinates(coords)
def create_coords(self, filenames): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates, Metadata # FIXME var_data = None lon = Coord(var_data["longitude"], get_metadata(var_data["longitude"][0]), axis="x") lat = Coord(var_data["latitude"], get_metadata(var_data["latitude"][0]), axis="y") time = Coord(var_data["time"], get_metadata(var_data["time"][0]), axis="t") coords = CoordList([lon, lat, time]) return UngriddedCoordinates(coords)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData from cis.data_io.Coord import Coord, CoordList from cis.exceptions import InvalidVariableError variables = [("lon", "x", 'longitude'), ("lat", "y", 'latitude'), ("alt", "z", 'altitude'), ("time", "t", 'time'), ("p", "p", 'air_pressure')] logging.info("Listing coordinates: " + str(variables)) coords = CoordList() for variable in variables: try: var_data = read_many_files_individually( filenames, variable[0])[variable[0]] meta = get_metadata(var_data[0]) meta.standard_name = variable[2] # Some of the variables have an illegal name attribute... meta.misc.pop('name', None) c = Coord(var_data, meta, axis=variable[1]) if variable[1] == 'z': c.convert_units('m') coords.append(c) except InvalidVariableError: pass # Note - We don't need to convert this time coord as it should have been written in our # 'standard' time unit if usr_variable is None: res = UngriddedCoordinates(coords) else: usr_var_data = read_many_files_individually( filenames, usr_variable)[usr_variable] meta = get_metadata(usr_var_data[0]) # Some of the variables have an illegal name attribute... meta.misc.pop('name', None) res = UngriddedData(usr_var_data, meta, coords) return res
def create_coords(self, filenames, usr_variable=None): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("aerosol_backscatter_coefficient", "m-1 sr-1")] logging.info("Listing coordinates: " + str(variables)) coords = CoordList() for variable in variables: try: var_data = read_many_files_individually( filenames, variable[0])[variable[0]] coords.append( Coord(var_data, get_metadata(var_data[0]), axis=variable[1])) except InvalidVariableError: pass return UngriddedCoordinates(coords)
def create_coords(self, filenames, variable=None): """ Override the default read-in to also read in CCN quality flag data and apply the appropriate mask. We have to do this before creating the UngriddedData object so that the missing coords don't get fixed first """ from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.utils import apply_mask_to_numpy_array, concatenate from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData data_variables, variable_selector = self._load_data(filenames, variable) dim_coords = self._create_coordinates_list(data_variables, variable_selector) if variable is None: return UngriddedCoordinates(dim_coords) else: aux_coord_name = variable_selector.find_auxiliary_coordinate(variable) if aux_coord_name is not None: all_coords = self._add_aux_coordinate(dim_coords, filenames[0], aux_coord_name, dim_coords.get_coord(standard_name='time').data.size) else: all_coords = dim_coords var_data = data_variables[variable] if variable and variable.startswith('CCN_COL'): # Work out the associated variable name for this column ccn_flag_var = "COL{}_FLAG".format(variable[-1]) # Read in the flags flags = concatenate([get_data(v) for v in read_many_files_individually(filenames, ccn_flag_var)[ ccn_flag_var]]) # 0 and 1 are both OK mask = flags > 1 # If a variable was supplied then coords must be an ungridded data object, apply the mask to it var_data = apply_mask_to_numpy_array(concatenate([get_data(v) for v in var_data]), mask) return UngriddedData(var_data, get_metadata(data_variables[variable][0]), all_coords)
def create_coords(self, filenames, usr_variable=None): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import UngriddedCoordinates from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("relative_humidity", "RH"), ("surface_air_pressure", "Pa"), ("air_temprature", "K"), ("wind_speed"), ("Wind Diretion"), ("rainfall_rate")] logging.info("Listing coordinates: " + str(variables)) coords = CoordList() for variable in variables: try: var_data = read_many_files_individually( filenames, variable[0])[variable[0]] coords.append( Coord(var_data, get_metadata(var_data[0]), axis=variable[1])) except InvalidVariableError: pass return UngriddedCoordinates(coords)
def create_coords(self, filenames, variable=None): return UngriddedCoordinates(self._create_coord_list(filenames))
def create_coords(self, filenames, variable=None): return UngriddedCoordinates(super(Caliop_L2, self)._create_coord_list(filenames, index_offset=1))
class TestUngriddedCoordinates(TestCase): def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) self.x = Coord(x, Metadata(standard_name='latitude', units='degrees')) self.y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([self.x, self.y]) self.ug = UngriddedCoordinates(coords) def test_can_create_ungridded_coordinates(self): standard_coords = self.ug.coords().find_standard_coords() assert(standard_coords == [self.x, self.y, None, None, None]) def test_get_coordinates_points_returns_points(self): points = self.ug.get_coordinates_points() num_points = len([p for p in points]) assert(num_points == 15) def test_GIVEN_missing_coord_values_WHEN_coords_THEN_missing_values_removed(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) coords = ug.coords() for coord in coords: assert_that(len(coord.data), is_(14)) def test_GIVEN_missing_coord_values_WHEN_coords_flattened_THEN_missing_values_removed(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) coords = ug.coords_flattened for coord in coords: if coord is not None: assert_that(len(coord), is_(14)) @skip_pandas def test_GIVEN_ungridded_coords_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self): df = self.ug.as_data_frame() assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) @skip_pandas def test_GIVEN_ungridded_coords_with_time_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self): from datetime import datetime ug = make_regular_2d_with_time_ungridded_data() ug_coords = UngriddedCoordinates(ug._coords) df = ug_coords.as_data_frame() assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) assert_that(df['longitude'][datetime(1984,8,28)] == 0)
class TestUngriddedCoordinates(TestCase): def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) self.x = Coord(x, Metadata(standard_name='latitude', units='degrees')) self.y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([self.x, self.y]) self.ug = UngriddedCoordinates(coords) def test_can_create_ungridded_coordinates(self): standard_coords = self.ug.coords().find_standard_coords() assert (standard_coords == [self.x, self.y, None, None, None]) def test_get_coordinates_points_returns_points(self): points = self.ug.get_coordinates_points() num_points = len([p for p in points]) assert (num_points == 15) def test_GIVEN_missing_coord_values_WHEN_coords_THEN_missing_values_removed( self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) coords = ug.coords() for coord in coords: assert_that(len(coord.data), is_(14)) def test_GIVEN_missing_coord_values_WHEN_coords_flattened_THEN_missing_values_removed( self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) y = np.ma.masked_array(y, np.zeros(y.shape, dtype=bool)) y.mask[1, 2] = True x = Coord(x, Metadata(standard_name='latitude', units='degrees')) y = Coord(y, Metadata(standard_name='longitude', units='degrees')) coords = CoordList([x, y]) ug = UngriddedCoordinates(coords) coords = ug.coords_flattened for coord in coords: if coord is not None: assert_that(len(coord), is_(14)) @skip_pandas def test_GIVEN_ungridded_coords_WHEN_call_as_data_frame_THEN_returns_valid_data_frame( self): df = self.ug.as_data_frame() assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) @skip_pandas def test_GIVEN_ungridded_coords_with_time_WHEN_call_as_data_frame_THEN_returns_valid_data_frame( self): from datetime import datetime ug = make_regular_2d_with_time_ungridded_data() ug_coords = UngriddedCoordinates(ug._coords) df = ug_coords.as_data_frame() assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) assert_that(df['longitude'][datetime(1984, 8, 28)] == 0)