def test_dimensions(self): """ Test that dimensions can be produced in a dataset, and successfully be read back from another read of the file. """ writer = NetCDFWriter() writer.dimension("short", np.int16, 1) writer.dimension("medium", np.int32, 10) writer.dimension("long", np.int64, 100) filepath = path.join(WRITE_OUTPUT_DIR, "dimensions.nc") writer.write(filepath) # Read the file in with a trusted NetCDF reader. ds = Dataset(filepath, "r") dims = ds.dimensions self.assertEqual(3, len(dims)) # Check that the dimensions have the same values as were written # earlier. self.assertIn("short", dims) self.assertEqual(1, len(dims["short"])) self.assertIn("medium", dims) self.assertEqual(10, len(dims["medium"])) self.assertIn("long", dims) self.assertEqual(100, len(dims["long"])) ds.close()
def test_no_data(self): """ Test that an empty NetCDF dataset can be written to a directory, without any dimensions, variables, attributes, or data. """ writer = NetCDFWriter() filepath = path.join(WRITE_OUTPUT_DIR, "empty.nc") writer.write(filepath)
def test_dimension_variables(self): """ Test that creating a dimension also creates the corresponding variable, and provides reasonable values for it. Tests that these variables can be read successfully on a later read of the file. """ writer = NetCDFWriter() writer.dimension("x", np.float32, 10, (0, 10)) writer.dimension("y", np.int32, 15, (10, 40)) filepath = path.join(WRITE_OUTPUT_DIR, "cartesian.nc") writer.write(filepath) # Read the file a second time with a trusted NetCDF reader. ds = Dataset(filepath) vars = ds.variables self.assertEqual(2, len(vars)) self.assertIn("x", vars) self.assertIn("y", vars) # Check that dimension variable properties are correct. self.assertEqual(10, vars["x"].size) self.assertEqual(np.float32, vars["x"].dtype) self.assertEqual(15, vars["y"].size) self.assertEqual(np.int32, vars["y"].dtype) # Check that the dimension variables have the right range of values, # from their lower limit to their upper limit in even increments. for i in range(10): self.assertEqual(i + 0.5, vars["x"][i]) for i in range(15): self.assertEqual(2 * i + 11, vars["y"][i]) ds.close()
def test_produces_file(self): """ Test that writing a dataset produces a new file in the right location. """ writer = NetCDFWriter() filepath = path.join(WRITE_OUTPUT_DIR, "single_point.nc") # Delete any preexisting file of the same name. try: remove(filepath) except OSError: pass writer.write(filepath) self.assertTrue(path.isfile(filepath))
def __init__(self: 'ModelOutput', data: List['LatLongGrid']) -> None: """ Instantiate a new ModelOutput object. Model data is provided through the data parameter, through a list of grid objects. Each grid in the list represents a segment of time, such as a month or a season. All grids must have the same dimensions. :param data: A list of latitude-longitude grids of data """ # Create output directory if it does not already exist. parent_out_dir = Path(OUTPUT_FULL_PATH) parent_out_dir.mkdir(exist_ok=True) self._data = data self._grid = data[0].dimensions() self._dataset = NetCDFWriter()
def test_global_attributes(self): """ Test that global dataset attributes can be produced, and successfully read back from another read of the file. """ writer = NetCDFWriter() writer.global_attribute("description", "A unittest case") writer.global_attribute("source", "Arrhenius project unittests") writer.global_attribute("history", "Written June 27, 2018") filepath = path.join(WRITE_OUTPUT_DIR, "global_attrs.nc") writer.write(filepath) # Read back the file with a trusted NetCDF library. ds = Dataset(filepath, "r") # Check expected contents of the three attributes written earlier. self.assertEqual(3, len(vars(ds))) self.assertEqual(ds.description, "A unittest case") self.assertEqual(ds.source, "Arrhenius project unittests") self.assertEqual(ds.history, "Written June 27, 2018") ds.close()
def test_unlimited_dimensions(self): """ Test that providing a None value for dimension length produces an unlimited variable that can be successfully read back in later. """ writer = NetCDFWriter() writer.dimension("inf", np.int8, None) filepath = path.join(WRITE_OUTPUT_DIR, "unlimited.nc") writer.write(filepath) # Read the file again to make sure the intended changes persisted. ds = Dataset(filepath) dims = ds.dimensions # Check that the dimension has the expected properties. self.assertEqual(1, len(dims)) self.assertIn("inf", dims) self.assertTrue(dims["inf"].isunlimited()) ds.close()
def test_variable_attributes(self): """ Test that attributes can be added to an existing variable, and that they can be successfully read back and accessed in future reads of the dataset. """ writer = NetCDFWriter() writer.dimension("x", np.int16, 100) writer.variable("dimensionless", np.int8, []) writer.variable("dimensional", np.int16, ["x"]) writer.variable("no_attrs", np.int32, ["x"]) # Add arbitrary data to each variable so that the write can be # successful. writer.data("dimensionless", np.array(5)) writer.data("dimensional", np.array(range(100))) writer.data("no_attrs", np.array(range(100, 400, 3))) writer.variable_attribute("dimensionless", "description", "No data available") writer.variable_attribute("dimensionless", "units", "No units") writer.variable_attribute("dimensional", "description", "A coordinate on the x-axis") writer.variable_attribute("dimensional", "units", "meters") filepath = path.join(WRITE_OUTPUT_DIR, "var_attrs.nc") writer.write(filepath) # Read back the three variables using a trusted library. ds = Dataset(filepath) dimless = ds.variables["dimensionless"] dimful = ds.variables["dimensional"] no_attrs = ds.variables["no_attrs"] # Ensure each variable has exactly the right number of attributes, and # that these attributes have the right values. self.assertEqual(2, len(vars(dimless))) self.assertEqual("No data available", dimless.description) self.assertEqual("No units", dimless.units) self.assertEqual(2, len(vars(dimful))) self.assertEqual("A coordinate on the x-axis", dimful.description) self.assertEqual("meters", dimful.units) self.assertEqual(0, len(vars(no_attrs))) ds.close()
def test_variable_dimensions(self): """ Test that variables can be created with varying numbers of dimensions, and that they accept data with the corresponding number of dimensions. Test reading back data to ensure proper persistence. """ writer = NetCDFWriter() writer.dimension("x", np.int16, 10) writer.dimension("y", np.int16, 10) # Examples with 0, 1, and multiple dimensions. writer.variable("no_dims", np.int16, []) writer.variable("line", np.int16, ["x"]) writer.variable("plane", np.int16, ["x", "y"]) # Each variable receives data of the same number of dimensions as # the variable itself. writer.data("no_dims", np.array(1)) writer.data("line", np.array(range(10))) writer.data("plane", np.array([range(i, i + 10) for i in range(10)])) filepath = path.join(WRITE_OUTPUT_DIR, "cartesian2.nc") writer.write(filepath) # Read the dataset back in with a trusted library. ds = Dataset(filepath) vars = ds.variables # Ensure only the right variables are present (including two dimension # variables and the three added manually) self.assertEqual(5, len(vars)) self.assertIn("x", vars) self.assertIn("y", vars) self.assertIn("no_dims", vars) self.assertIn("line", vars) self.assertIn("plane", vars) # Proper variable values for the 0-dimensional variable. self.assertEqual(1, vars["no_dims"].size) self.assertEqual(1, vars["no_dims"][:]) # Proper variable values for the 1-dimensional variable. self.assertEqual(10, vars["line"].size) self.assertEqual((10, ), vars["line"].shape) self.assertEqual(0, vars["line"][0]) self.assertEqual(4, vars["line"][4]) self.assertEqual(9, vars["line"][9]) # Proper variable values for the 2-dimensional variable. self.assertEqual(100, vars["plane"].size) self.assertEqual((10, 10), vars["plane"].shape) self.assertEqual(0, vars["plane"][0, 0]) self.assertEqual(5, vars["plane"][0, 5]) self.assertEqual(2, vars["plane"][1, 1]) self.assertEqual(6, vars["plane"][2, 4]) self.assertEqual(12, vars["plane"][5, 7]) self.assertEqual(11, vars["plane"][8, 3]) self.assertEqual(9, vars["plane"][9, 0]) self.assertEqual(18, vars["plane"][9, 9]) ds.close()
def test_variable_attribute_requires_variable(self): """ Test that an error is raised when an attribute is given to a variable that does not exist. """ # Test on an empty dataset. writer = NetCDFWriter() with self.assertRaises(ValueError): writer.variable_attribute("dummy", "description", "A dummy variable") # Test with one dimension of the same name. writer.dimension("dummy", np.int32, 1) with self.assertRaises(ValueError): writer.variable_attribute("dummy", "description", "A dummy variable") # Test with a variable with a different name. writer.variable("fake", np.int32, ["dummy"]) with self.assertRaises(ValueError): writer.variable_attribute("dummy", "description", "A dummy variable") # Test that no errors are raised when a correct variable exists. writer.variable("dummy", np.int32, ["dummy"]) writer.variable_attribute("dummy", "description", "A dummy variable")
def test_variable_requires_dimension(self): """ Test that an error is raised when a variable is created that refers to a dimension that does not exist. """ # Test with an empty dataset. writer = NetCDFWriter() with self.assertRaises(ValueError): writer.variable("dummy", np.int32, ["dummy"]) # Test with a dimension of a different name. writer.dimension("fake", np.int32, 5) with self.assertRaises(ValueError): writer.variable("dummy", np.int32, ["dummy"]) # Test that no errors are raised when a correct dimension exists. writer.dimension("dummy", np.int32, 4) writer.variable("dummy", np.int32, ["dummy"])
def test_data_requires_variable(self): """ Test that an error is raised when data is submitted to a variable that does not exist. """ # Test with no variables. writer = NetCDFWriter() with self.assertRaises(KeyError): writer.data("dummy", np.array([1])) # Test with no variables but a dimension of the same name. writer.dimension("dummy", np.int32, 1) with self.assertRaises(KeyError): writer.data("dummy", np.array([1])) # Test with a variable of a different name. writer.variable("fake", np.int32, ["dummy"]) with self.assertRaises(KeyError): writer.data("dummy", np.array([1])) # Test that no errors are raised when a correct variable exists. writer.variable("dummy", np.int32, ["dummy"]) writer.data("dummy", np.array([1]))
class ModelOutput: """ A general-purpose center for all forms of output. Responsible for organization of program output into folders. The output of a program is defined as the temperature data produced by a run of the model. This data may be saved in the form of a data file (such as a NetCDF file) and/or as image representations, and/or in other data formats. All of these output types are produced side-by-side, and stored in their own directory to keep data separate from different model runs. """ def __init__(self: 'ModelOutput', data: List['LatLongGrid']) -> None: """ Instantiate a new ModelOutput object. Model data is provided through the data parameter, through a list of grid objects. Each grid in the list represents a segment of time, such as a month or a season. All grids must have the same dimensions. :param data: A list of latitude-longitude grids of data """ # Create output directory if it does not already exist. parent_out_dir = Path(OUTPUT_FULL_PATH) parent_out_dir.mkdir(exist_ok=True) self._data = data self._grid = data[0].dimensions() self._dataset = NetCDFWriter() def write_dataset(self: 'ModelOutput', data: List['LatLongGrid'], dir_path: str, dataset_name: str) -> None: """ Produce a NetCDF dataset, with the name given by dataset_name.nc, containing the variables in the data parameter that the output controller allows. The dataset will be written to the specified path in the file system. The dataset contains all the dimensions that are used in the data (e.g. time, latitude, longitude) as well as variables including final temperature, temperature change, humidity, etc. according to which of the ReportDatatype output types are enabled in the current output controller. :param data: Output from an Arrhenius model run :param dir_path: The directory where the dataset will be written :param dataset_name: The name of the dataset """ # Write the data out to a NetCDF file in the output directory. grid_by_count = self._grid.dims_by_count() output_path = path.join(dir_path, dataset_name) global_output_center().submit_output(Debug.PRINT_NOTICES, "Writing NetCDF dataset...") self._dataset.global_attribute("description", "Output for an" "Arrhenius model run.")\ .dimension('time', np.int32, len(data), (0, len(data)))\ .dimension('latitude', np.int32, grid_by_count[0], (-90, 90)) \ .dimension('longitude', np.int32, grid_by_count[1], (-180, 180)) \ for output_type in ReportDatatype: variable_data =\ extract_multidimensional_grid_variable(data, output_type.value) global_output_center().submit_output(output_type, variable_data, output_type.value) self._dataset.write(output_path) def write_dataset_variable(self: 'ModelOutput', data: np.ndarray, data_type: str) -> None: """ Prepare to write data into a variable by the name of data_type in this instance's NetCDF dataset file. Apply this variable's dimensions and type, along with several attributes. :param data: A single-variable grid taken from Arrhenius model output :param data_type: The name of the variable as it will appear in the dataset """ dims_map = [[], ['latitude'], ['latitude', 'longitude'], ['time', 'latitude', 'longitude'], ['time', 'level', 'latitude', 'longitude']] global_output_center().submit_output( Debug.PRINT_NOTICES, "Writing {} to dataset".format(data_type)) variable_type = VARIABLE_METADATA[data_type][VAR_TYPE] self._dataset.variable(data_type, variable_type, dims_map[data.ndim]) for attr, val in VARIABLE_METADATA[data_type][VAR_ATTRS].items(): self._dataset.variable_attribute(data_type, attr, val) self._dataset.data(data_type, data) def write_images(self: 'ModelOutput', data: List['LatLongGrid'], output_path: str, run_id: str = "") -> None: """ Produce a series of maps displaying some of the results of an Arrhenius model run according to what variable the output controller allows. Images are stored in a directory given by output_path. One image will be produced per time segment per variable for which output is allowed by the output controller, based on which ReportDatatype output types are enabled. The optional argument img_base_name specifies a prefix that will be added to each of the image files to identify which model run they belong to. :param data: The output from an Arrhenius model run :param output_path: The directory where image files will be stored :param run_id: A prefix that will start off the names of all the image files """ output_controller = global_output_center() # Attempt to output images for each variable output type. for output_type in ReportDatatype: variable_name = output_type.value variable = extract_multidimensional_grid_variable( data, variable_name) img_type_path = path.join(output_path, variable_name) output_controller.submit_output(output_type, variable, img_type_path, variable_name, run_id) def write_output(self: 'ModelOutput', run_title: str) -> None: """ Produce NetCDF data files and image files from the provided data, and a directory with the name dir_name to hold them. One image file is created per time segment in the data. In the case of Arrhenius' model, this is one per season. Only one NetCDF data file is produced, in which all time segments are present. """ # Create a directory for this model output if none exists already. out_dir_path = path.join(OUTPUT_FULL_PATH, run_title) out_dir = Path(out_dir_path) out_dir.mkdir(exist_ok=True) output_controller = global_output_center() output_controller.submit_collection_output( (DATASET_VARS, ), self._data, out_dir_path, run_title + ".nc") output_controller.submit_collection_output((IMAGES, ), self._data, out_dir_path, run_title)