class TestCF(BaseTestCase): def setUp(self): ''' Initialize the dataset ''' self.cf = CFBaseCheck() # -------------------------------------------------------------------------------- # Helper Methods # -------------------------------------------------------------------------------- def new_nc_file(self): ''' Make a new temporary netCDF file for the scope of the test ''' nc_file_path = os.path.join(gettempdir(), 'example.nc') if os.path.exists(nc_file_path): raise IOError('File Exists: %s' % nc_file_path) nc = Dataset(nc_file_path, 'w') self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) return nc def load_dataset(self, nc_dataset): ''' Return a loaded NC Dataset for the given path ''' if not isinstance(nc_dataset, str): raise ValueError("nc_dataset should be a string") nc_dataset = Dataset(nc_dataset, 'r') self.addCleanup(nc_dataset.close) return nc_dataset def get_results(self, results): ''' Returns a tuple of the value scored, possible, and a list of messages in the result set. ''' out_of = 0 scored = 0 for r in results: if isinstance(r.value, tuple): out_of += r.value[1] scored += r.value[0] else: out_of += 1 scored += int(r.value) # Store the messages messages = [] for r in results: messages.extend(r.msgs) return scored, out_of, messages # -------------------------------------------------------------------------------- # Compliance Tests # -------------------------------------------------------------------------------- def test_check_data_types(self): """ 2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_data_types(dataset) assert result.value[0] == result.value[1] dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_data_types(dataset) assert result.msgs[0] == 'The variable temp failed because the datatype is int64' assert result.value == (6, 7) def test_naming_conventions(self): ''' Section 2.3 Naming Conventions Variable, dimension and attribute names should begin with a letter and be composed of letters, digits, and underscores. ''' dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_naming_conventions(dataset) num_var = len(dataset.variables) result_dict = {result.name: result for result in results} result = result_dict[u'§2.3 Naming Conventions for variables'] assert result.value == (num_var, num_var) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_naming_conventions(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§2.3 Naming Conventions for variables'] assert result.value == (13, 14) assert u'variable _poor_dim should begin with a letter and be composed of letters, digits, and underscores' == result.msgs[0] score, out_of, messages = self.get_results(results) assert (score, out_of) == (49, 51) dataset = self.load_dataset(STATIC_FILES['chap2']) results = self.cf.check_naming_conventions(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§2.3 Naming Conventions for variables'] assert result.value == (6, 7) assert u'variable bad name should begin with a letter and be composed of letters, digits, and underscores' == result.msgs[0] result = result_dict[u'§2.3 Naming Conventions for attributes'] assert result.msgs[0] == ('attribute no_reason:_bad_attr should begin with a letter and be ' 'composed of letters, digits, and underscores') assert result.msgs[1] == ('global attribute bad global should begin with a letter and be ' 'composed of letters, digits, and underscores') score, out_of, messages = self.get_results(results) assert (score, out_of) == (16, 19) def test_check_names_unique(self): """ 2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same. """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_names_unique(dataset) num_var = len(dataset.variables) expected = (num_var,) * 2 self.assertEqual(result.value, expected) dataset = self.load_dataset(STATIC_FILES['chap2']) result = self.cf.check_names_unique(dataset) assert result.value == (6, 7) assert result.msgs[0] == 'Variables are not case sensitive. Duplicate variables named: not_unique' def test_check_dimension_names(self): """ 2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_dimension_names(dataset) assert result.value == (6, 7) dataset = self.load_dataset(STATIC_FILES['chap2']) result = self.cf.check_dimension_names(dataset) assert result.msgs[0] == u'no_reason has two or more dimensions named time' def test_check_dimension_order(self): """ 2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z), "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y, then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the left of the spatiotemporal dimensions. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_dimension_order(dataset) assert result.value == (5, 6) assert result.msgs[0] == ("really_bad's dimensions are not in the recommended order " "T, Z, Y, X. They are latitude, power") def test_check_fill_value_outside_valid_range(self): """ 2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_fill_value_outside_valid_range(dataset) assert result.msgs[0] == ('salinity:_FillValue (1.0) should be outside the ' 'range specified by valid_min/valid_max (-10, 10)') dataset = self.load_dataset(STATIC_FILES['chap2']) result = self.cf.check_fill_value_outside_valid_range(dataset) assert result.value == (1, 2) assert result.msgs[0] == ('wind_speed:_FillValue (12.0) should be outside the ' 'range specified by valid_min/valid_max (0.0, 20.0)') def test_check_conventions_are_cf_16(self): """ 2.6.1 the NUG defined global attribute Conventions to the string value "CF-1.6" """ # :Conventions = "CF-1.6" dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "CF-1.6 ,ACDD" ; dataset = self.load_dataset(STATIC_FILES['conv_multi']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "NoConvention" dataset = self.load_dataset(STATIC_FILES['conv_bad']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertFalse(result.value) assert result.msgs[0] == ('Conventions global attribute does not contain ' '"CF-1.6". The CF Checker only supports CF-1.6 ' 'at this time.') def test_check_convention_globals(self): """ 2.6.2 title/history global attributes, must be strings. Do not need to exist. """ # check for pass dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_convention_globals(dataset) assert result.value == (2, 2) # check if it doesn't exist that we pass dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_convention_globals(dataset) assert result.value == (0, 2) assert result.msgs[0] == 'global attribute title should exist and be a non-empty string' def test_check_convention_possibly_var_attrs(self): """ 3.1 The units attribute is required for all variables that represent dimensional quantities (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables defined in Section 7.4, "Climatological Statistics"). Units are not required for dimensionless quantities. A variable with no units attribute is assumed to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be included. - units required - type must be recognized by udunits - if std name specified, must be consistent with standard name table, must also be consistent with a specified cell_methods attribute if present """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_convention_possibly_var_attrs(dataset) # 10x comment attrs # 1x institution # 1x source # 1x EMPTY references assert result.value == (15, 16) assert result.msgs[0] == "references global attribute should be a non-empty string" dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_convention_possibly_var_attrs(dataset) # no references # institution is a 10L # no source # comments doment matter unless they're empty assert result.value == (1, 4) assert result.msgs[0] == 'salinity:institution should be a non-empty string' assert result.msgs[1] == 'source should be defined' assert result.msgs[2] == 'references should be defined' def test_check_standard_name(self): """ 3.3 A standard name is associated with a variable via the attribute standard_name which takes a string value comprised of a standard name optionally followed by one or more blanks and a standard name modifier """ dataset = self.load_dataset(STATIC_FILES['2dim']) result = self.cf.check_standard_name(dataset) for each in result: self.assertTrue(each.value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_standard_name(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§3.2 Either long_name or standard_name is highly recommended for variable time'] assert result.value == (0, 1) assert "Attribute long_name or/and standard_name is highly recommended for variable time" in result.msgs result = result_dict[u'§3.2 Either long_name or standard_name is highly recommended for variable latitude'] assert result.value == (0, 1) assert "Attribute long_name or/and standard_name is highly recommended for variable latitude" in result.msgs #assert 'variable latitude\'s attribute standard_name must be a non-empty string or it should define a long_name attribute.' == result.msgs[0] result = result_dict[u'§3.3 Variable salinity has valid standard_name attribute'] assert result.value == (1, 2) assert 'standard_name Chadwick is not defined in Standard Name Table' in result.msgs[0] result = result_dict[u'§3.3 standard_name modifier for salinity is valid'] assert result.value == (0, 1) assert len(result_dict) == 9 dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid']) results = self.cf.check_standard_name(dataset) score, out_of, messages = self.get_results(results) # Make sure that the rgrid coordinate variable isn't checked for standard_name # time, lat, lon exist with three checks each assert (score, out_of) == (11, 11) def test_cell_bounds(self): dataset = self.load_dataset(STATIC_FILES['grid-boundaries']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) assert (score, out_of) == (2, 2) dataset = self.load_dataset(STATIC_FILES['cf_example_cell_measures']) results = self.cf.check_cell_boundaries(dataset) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_cell_boundaries(dataset) dataset = self.load_dataset(STATIC_FILES['bounds_bad_order']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) # Make sure that the rgrid coordinate variable isn't checked for standard_name assert (score, out_of) == (0, 2) # hacky, but handles issues with Python 2/3 string interpolation if sys.version_info.major == 3: tuple_format = "('nv', 'lat')" else: tuple_format = "(u'nv', u'lat')" assert u"Boundary variable coordinates are in improper order: {}. Bounds-specific dimensions should be last".format(tuple_format) in messages dataset = self.load_dataset(STATIC_FILES['bounds_bad_num_coords']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) assert (score, out_of) == (0, 2) assert ('The number of dimensions of the variable lat is 1, but the number of dimensions of the boundary variable lat_bnds is 1. The boundary variable should have 2 dimensions' in messages) dataset = self.load_dataset(STATIC_FILES['1d_bound_bad']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) if sys.version_info.major == 3: tuple_format = "('lon',)" else: tuple_format = "(u'lon',)" assert u"Boundary variable dimension lon_bnds must have at least 2 elements to form a simplex/closed cell with previous dimensions {}.".format(tuple_format) in messages def test_cell_measures(self): dataset = self.load_dataset(STATIC_FILES['cell_measure']) results = self.cf.check_cell_measures(dataset) score, out_of, messages = self.get_results(results) assert score == out_of assert score > 0 dataset = self.load_dataset(STATIC_FILES['bad_cell_measure1']) results = self.cf.check_cell_measures(dataset) score, out_of, messages = self.get_results(results) message = ("The cell_measures attribute for variable PS is formatted incorrectly. " "It should take the form of either 'area: cell_var' or 'volume: cell_var' " "where cell_var is the variable describing the cell measures") assert message in messages dataset = self.load_dataset(STATIC_FILES['bad_cell_measure2']) results = self.cf.check_cell_measures(dataset) score, out_of, messages = self.get_results(results) message = 'Cell measure variable PS referred to by box_area is not present in dataset variables' assert message in messages def test_climatology(self): dataset = self.load_dataset(STATIC_FILES['climatology']) results = self.cf.check_climatological_statistics(dataset) score, out_of, messages = self.get_results(results) def test_check_ancillary_variables(self): ''' Test to ensure that ancillary variables are properly checked ''' dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_ancillary_variables(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§3.4 Ancillary Variables defined by temperature'] assert result.value == (2, 2) dataset = self.load_dataset(STATIC_FILES['bad_reference']) results = self.cf.check_ancillary_variables(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§3.4 Ancillary Variables defined by temp'] assert result.value == (1, 2) assert "temp_qc is not a variable in this dataset" == result.msgs[0] def test_download_standard_name_table(self): """ Test that a user can download a specific standard name table """ version = '35' data_directory = create_cached_data_dir() location = os.path.join(data_directory, 'cf-standard-name-table-test-{0}.xml'.format(version)) download_cf_standard_name_table(version, location) # Test that the file now exists in location and is the right version self.assertTrue(os.path.isfile(location)) std_names = StandardNameTable(location) self.assertEqual(std_names._version, version) self.addCleanup(os.remove, location) def test_bad_standard_name_table(self): """ Test that failure in case a bad standard name table is passed. """ with pytest.raises(IOError): StandardNameTable('dummy_non_existent_file.ext') def test_check_flags(self): dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_flags(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§3.5 time_qc is a valid flags variable'] assert result.value == (1, 1) result = result_dict[u'§3.5 flag_meanings for time_qc'] assert result.value == (3, 3) result = result_dict[u'§3.5 flag_values for time_qc'] assert result.value == (4, 4) # lat(time); # lat:flag_meanings = ""; result = result_dict[u'§3.5 lat is a valid flags variable'] assert result.value == (0, 1) result = result_dict[u'§3.5 flag_meanings for lat'] assert result.value == (2, 3) assert "flag_meanings can't be empty" == result.msgs[0] def test_check_flag_masks(self): dataset = self.load_dataset(STATIC_FILES['ghrsst']) results = self.cf.check_flags(dataset) scored, out_of, messages = self.get_results(results) # This is an example of a perfect dataset for flags assert scored > 0 assert scored == out_of def test_check_bad_units(self): dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_units(dataset) for result in results: self.assert_result_is_good(result) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_units(dataset) result_dict = {result.name: result for result in results} # it's Degrees_E which is a valid udunits. The preferred units are # degrees_east and they are checked in the check_longitude check result = result_dict[u'§3.1 Variable longitude\'s units are contained in UDUnits'] assert result.value == (1, 1) result = result_dict[u'§3.1 Variable temp contains valid CF units'] assert result.value == (3, 3) result = result_dict[u'§3.1 Variable temp\'s units are contained in UDUnits'] assert result.value == (1, 1) dataset = self.load_dataset(STATIC_FILES['bad_units']) results = self.cf.check_units(dataset) result_dict = {result.name: result for result in results} # time(time) # time:units = "s" result = result_dict[u'§3.1 Variable time contains valid CF units'] # They are valid and even valid UDUnits assert result.value == (3, 3) result = result_dict[u"§3.1 Variable time's units are contained in UDUnits"] assert result.value == (1, 1) # But they are not appropriate for time result = result_dict[u"§3.1 Variable time's units are appropriate for the standard_name time"] assert result.value == (0, 1) # lat; # lat:units = "degrees_E"; # Should all be good result = result_dict[u"§3.1 Variable lat's units are appropriate for the standard_name latitude"] assert result.value == (0, 1) # lev; # lev:units = "level"; # level is deprecated result = result_dict[u"§3.1 Variable lev contains valid CF units"] assert result.value == (2, 3) assert 'units for lev, "level" are deprecated by CF 1.6' in result.msgs # temp_count(time); # temp_count:standard_name = "atmospheric_temperature number_of_observations"; # temp_count:units = "1"; result = result_dict[u"§3.1 Variable temp_count's units are appropriate for " u"the standard_name atmospheric_temperature number_of_observations"] assert result.value == (1, 1) def test_latitude(self): ''' Section 4.1 Latitude Coordinate ''' # Check compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_latitude(dataset) score, out_of, messages = self.get_results(results) assert (score, out_of) == (3, 3) # Verify non-compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_latitude(dataset) result_dict = {result.name: result for result in results} scored, out_of, messages = self.get_results(results) result = result_dict[u'§4.1 Latitude variable lat has required units attribute'] assert result.value == (0, 1) assert result.msgs[0] == "latitude variable 'lat' must define units" result = result_dict[u'§4.1 Latitude variable lat uses recommended units'] assert result.value == (0, 1) result = result_dict[u'§4.1 Latitude variable lat defines units using degrees_north'] assert result assert result.msgs[0] == "CF recommends latitude variable 'lat' to use units degrees_north" result = result_dict[u'§4.1 Latitude variable lat defines either standard_name or axis'] assert result.value == (1, 1) result = result_dict[u'§4.1 Latitude variable lat_uv has required units attribute'] assert result.value == (1, 1) result = result_dict[u'§4.1 Latitude variable lat_uv uses recommended units'] assert result.value == (1, 1) result = result_dict[u'§4.1 Latitude variable lat_uv defines units using degrees_north'] assert result assert result.msgs[0] == "CF recommends latitude variable 'lat_uv' to use units degrees_north" result = result_dict[u'§4.1 Latitude variable lat_uv defines either standard_name or axis'] assert result.value == (1, 1) assert (scored, out_of) == (9, 12) dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid']) results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) assert (scored, out_of) == (6, 6) # hack to avoid writing to read-only file dataset.variables['rlat'] = MockVariable(dataset.variables['rlat']) rlat = dataset.variables['rlat'] rlat.name = 'rlat' # test with a bad value rlat.units = 'degrees_north' results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) wrong_format = "Grid latitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages) rlat.units = 'radians' results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages) def test_longitude(self): ''' Section 4.2 Longitude Coordinate ''' # Check compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_longitude(dataset) score, out_of, messages = self.get_results(results) assert (score, out_of) == (3, 3) # Verify non-compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_longitude(dataset) result_dict = {result.name: result for result in results} scored, out_of, messages = self.get_results(results) result = result_dict[u'§4.1 Longitude variable lon has required units attribute'] assert result.value == (0, 1) assert result.msgs[0] == "longitude variable 'lon' must define units" result = result_dict[u'§4.1 Longitude variable lon uses recommended units'] assert result.value == (0, 1) result = result_dict[u'§4.1 Longitude variable lon defines units using degrees_east'] assert result assert result.msgs[0] == "CF recommends longitude variable 'lon' to use units degrees_east" result = result_dict[u'§4.1 Longitude variable lon defines either standard_name or axis'] assert result.value == (1, 1) result = result_dict[u'§4.1 Longitude variable lon_uv has required units attribute'] assert result.value == (1, 1) result = result_dict[u'§4.1 Longitude variable lon_uv uses recommended units'] assert result.value == (1, 1) result = result_dict[u'§4.1 Longitude variable lon_uv defines units using degrees_east'] assert result assert result.msgs[0] == "CF recommends longitude variable 'lon_uv' to use units degrees_east" result = result_dict[u'§4.1 Longitude variable lon_uv defines either standard_name or axis'] assert result.value == (1, 1) assert (scored, out_of) == (9, 12) dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid']) results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) assert (scored, out_of) == (6, 6) # hack to avoid writing to read-only file dataset.variables['rlon'] = MockVariable(dataset.variables['rlon']) rlon = dataset.variables['rlon'] rlon.name = 'rlon' # test with a bad value rlon.units = 'degrees_east' results = self.cf.check_longitude(dataset) scored, out_of, messages = self.get_results(results) wrong_format = "Grid longitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages) rlon.units = 'radians' results = self.cf.check_longitude(dataset) scored, out_of, messages = self.get_results(results) self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages) def test_is_vertical_coordinate(self): ''' Section 4.3 Qualifiers for Vertical Coordinate NOTE: The standard doesn't explicitly say that vertical coordinates must be a coordinate type. ''' # Make something that I can attach attrs to mock_variable = MockVariable # Proper name/standard_name known_name = mock_variable() known_name.standard_name = 'depth' self.assertTrue(is_vertical_coordinate('not_known', known_name)) # Proper Axis axis_set = mock_variable() axis_set.axis = 'Z' self.assertTrue(is_vertical_coordinate('not_known', axis_set)) # Proper units units_set = mock_variable() units_set.units = 'dbar' self.assertTrue(is_vertical_coordinate('not_known', units_set)) # Proper units/positive positive = mock_variable() positive.units = 'm' positive.positive = 'up' self.assertTrue(is_vertical_coordinate('not_known', positive)) def test_vertical_dimension(self): ''' Section 4.3.1 Dimensional Vertical Coordinate ''' # Check for compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_dimensional_vertical_coordinate(dataset) assert len(results) == 1 assert results[0].name == u'§4.3.1 height is a valid vertical coordinate' assert results[0].value == (2, 2) dataset = self.load_dataset(STATIC_FILES['illegal-vertical']) results = self.cf.check_dimensional_vertical_coordinate(dataset) assert len(results) == 1 assert results[0].name == u'§4.3.1 z is a valid vertical coordinate' assert results[0].value == (0, 2) assert results[0].msgs[0] == 'units must be defined for vertical coordinates, there is no default' assert results[0].msgs[1] == ("vertical coordinates not defining pressure must include a positive attribute that " "is either 'up' or 'down'") def test_appendix_d(self): ''' CF 1.6 Appendix D The definitions given here allow an application to compute dimensional coordinate values from the dimensionless ones and associated variables. The formulas are expressed for a gridpoint (n,k,j,i) where i and j are the horizontal indices, k is the vertical index and n is the time index. A coordinate variable is associated with its definition by the value of the standard_name attribute. The terms in the definition are associated with file variables by the formula_terms attribute. The formula_terms attribute takes a string value, the string being comprised of blank-separated elements of the form "term: variable", where term is a keyword that represents one of the terms in the definition, and variable is the name of the variable in a netCDF file that contains the values for that term. The order of elements is not significant. ''' # For each of the listed dimensionless vertical coordinates, # verify that the formula_terms match the provided set of terms self.assertTrue(no_missing_terms('atmosphere_ln_pressure_coordinate', {"p0", "lev"})) self.assertTrue(no_missing_terms('atmosphere_sigma_coordinate', {"sigma", "ps", "ptop"})) self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate', {'a', 'b', 'ps'})) # test alternative terms for # 'atmosphere_hybrid_sigma_pressure_coordinate' self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate', {'ap', 'b', 'ps'})) # check that an invalid set of terms fails self.assertFalse(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate', {'a', 'b', 'p'})) self.assertTrue(no_missing_terms('atmosphere_hybrid_height_coordinate', {"a", "b", "orog"})) # missing terms should cause failure self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate', {"a", "b"})) # excess terms should cause failure self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate', {"a", "b", "c", "orog"})) self.assertTrue(no_missing_terms('atmosphere_sleve_coordinate', {"a", "b1", "b2", "ztop", "zsurf1", "zsurf2"})) self.assertTrue(no_missing_terms('ocean_sigma_coordinate', {"sigma", "eta", "depth"})) self.assertTrue(no_missing_terms('ocean_s_coordinate', {"s", "eta", "depth", "a", "b", "depth_c"})) self.assertTrue(no_missing_terms('ocean_sigma_z_coordinate', {"sigma", "eta", "depth", "depth_c", "nsigma", "zlev"})) self.assertTrue(no_missing_terms('ocean_double_sigma_coordinate', {"sigma", "depth", "z1", "z2", "a", "href", "k_c"})) def test_dimensionless_vertical(self): ''' Section 4.3.2 ''' # Check affirmative compliance dataset = self.load_dataset(STATIC_FILES['dimensionless']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§4.3.2 lev does not contain deprecated units'] assert result.value[0] == result.value[1] result = result_dict[u'§4.3.2 lev has valid formula_terms'] assert result.value[0] == result.value[1] # Check negative compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§4.3.2 lev1 does not contain deprecated units'] assert result.value == (1, 1) result = result_dict[u'§4.3.2 lev1 has valid formula_terms'] assert result.value == (0, 1) assert result.msgs[0] == u'formula_terms is a required attribute and must be a non-empty string' result = result_dict[u'§4.3.2 lev2 has valid formula_terms'] assert result.value == (4, 5) err_str = "The following variable(s) referenced in formula_terms are not present in the dataset variables: var1, var2, var3" self.assertTrue(err_str in result.msgs) # test with an invalid formula_terms dataset.variables['lev2'] = MockVariable(dataset.variables['lev2']) lev2 = dataset.variables['lev2'] lev2.formula_terms = 'a: var1 b:var2 orog:' # create a malformed formula_terms attribute and check that it fails results = self.cf.check_dimensionless_vertical_coordinate(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§4.3.2 lev2 has valid formula_terms'] self.assertTrue('Attribute formula_terms is not well-formed' in result.msgs) def test_is_time_variable(self): var1 = MockVariable() var1.standard_name = 'time' self.assertTrue(is_time_variable('not_time', var1)) var2 = MockVariable() self.assertTrue(is_time_variable('time', var2)) self.assertFalse(is_time_variable('not_time', var2)) var3 = MockVariable() var3.axis = 'T' self.assertTrue(is_time_variable('maybe_time', var3)) var4 = MockVariable() var4.units = 'seconds since 1900-01-01' self.assertTrue(is_time_variable('maybe_time', var4)) def test_check_time_coordinate(self): dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_time_coordinate(dataset) for r in results: self.assertTrue(r.value) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_time_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert 'time does not have correct time units' in messages assert (scored, out_of) == (1, 2) def test_check_calendar(self): dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_calendar(dataset) for r in results: self.assertTrue(r.value) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_calendar(dataset) scored, out_of, messages = self.get_results(results) assert "Variable time should have a valid calendar: 'nope' is not a valid calendar" in messages def test_check_aux_coordinates(self): dataset = self.load_dataset(STATIC_FILES['illegal-aux-coords']) results = self.cf.check_aux_coordinates(dataset) result_dict = {result.name: result for result in results} result = result_dict[u"§5.0 Auxiliary Coordinates of h_temp must have a subset of h_temp's dimensions"] assert result.value == (2, 4) regx = (r"dimensions for auxiliary coordinate variable lat \([xy]c, [xy]c\) are not a subset of dimensions for variable " r"h_temp \(xc\)") assert re.match(regx, result.msgs[0]) is not None result = result_dict[u"§5.0 Auxiliary Coordinates of sal must have a subset of sal's dimensions"] assert result.value == (4, 4) def test_check_grid_coordinates(self): dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_grid_coordinates(dataset) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} result = result_dict[u'§5.6 Grid Feature T is associated with true latitude and true longitude'] assert result.value == (2, 2) assert (scored, out_of) == (2, 2) def test_check_two_dimensional(self): dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_grid_coordinates(dataset) for r in results: self.assertTrue(r.value) # Need the bad testing dataset = self.load_dataset(STATIC_FILES['bad2dim']) results = self.cf.check_grid_coordinates(dataset) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} # Missing association result = result_dict[u'§5.6 Grid Feature T is associated with true latitude and true longitude'] assert result.msgs[0] == 'T is not associated with a coordinate defining true latitude and sharing a subset of dimensions' # Dimensions aren't a subet of the variables' result = result_dict[u'§5.6 Grid Feature C is associated with true latitude and true longitude'] assert result.msgs[0] == 'C is not associated with a coordinate defining true latitude and sharing a subset of dimensions' def test_check_reduced_horizontal_grid(self): dataset = self.load_dataset(STATIC_FILES['rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§5.3 PS is a valid reduced horizontal grid'] assert result.value == (7, 7) dataset = self.load_dataset(STATIC_FILES['bad-rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§5.3 PSa is a valid reduced horizontal grid'] assert result.value == (6, 7) assert result.msgs[0] == "PSa must be associated with a valid longitude coordinate" result = result_dict[u'§5.3 PSb is a valid reduced horizontal grid'] # The dimensions don't line up but another §5.0 check catches it. assert result.value == (7, 7) def test_check_grid_mapping(self): dataset = self.load_dataset(STATIC_FILES['mapping']) results = self.cf.check_grid_mapping(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§5.6 Grid Mapping Variable epsg must define a valid grid mapping'] assert result.value == (7, 8) assert result.msgs[0] == 'false_easting is a required attribute for grid mapping stereographic' result = result_dict[u'§5.6 Grid Mapping Variable wgs84 must define a valid grid mapping'] assert result.value == (3, 3) result = result_dict[u'§5.6 Variable lat defining a grid mapping has valid grid_mapping attribute'] assert result.value == (2, 2) def test_check_geographic_region(self): dataset = self.load_dataset(STATIC_FILES['bad_region']) results = self.cf.check_geographic_region(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§6.1.1 Geographic region specified by neverland is valid'] assert result.value == (0, 1) result = result_dict[u'§6.1.1 Geographic region specified by geo_region is valid'] assert result.value == (1, 1) def test_check_packed_data(self): dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_packed_data(dataset) self.assertEqual(len(results), 4) self.assertFalse(results[0].value) self.assertTrue(results[1].value) self.assertTrue(results[2].value) self.assertFalse(results[3].value) def test_compress_packed(self): """Tests compressed indexed coordinates""" dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid']) results = self.cf.check_compression_gathering(dataset) self.assertTrue(results[0].value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_compression_gathering(dataset) self.assertFalse(results[0].value) self.assertFalse(results[1].value) def test_check_all_features_are_same_type(self): dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_all_features_are_same_type(dataset) assert result dataset = self.load_dataset(STATIC_FILES['featureType']) result = self.cf.check_all_features_are_same_type(dataset) assert result def test_check_units(self): ''' Ensure that container variables are not checked for units but geophysical variables are ''' dataset = self.load_dataset(STATIC_FILES['units_check']) results = self.cf.check_units(dataset) # We don't keep track of the variables names for checks that passed, so # we can make a strict assertion about how many checks were performed # and if there were errors, which there shouldn't be. scored, out_of, messages = self.get_results(results) assert scored == 20 assert out_of == 20 assert messages == [] def test_check_duplicates(self): ''' Test to verify that the check identifies duplicate axes ''' dataset = self.load_dataset(STATIC_FILES['duplicate_axis']) results = self.cf.check_duplicate_axis(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§5.0 Variable temp does not contain duplicate coordinates'] assert result.msgs[0] == 'duplicate axis X defined by lon_rho' def test_check_multi_dimensional_coords(self): ''' Test to verify that multi dimensional coordinates are checked for sharing names with dimensions ''' dataset = self.load_dataset(STATIC_FILES['multi-dim-coordinates']) results = self.cf.check_multi_dimensional_coords(dataset) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} result = result_dict[u'§5.0 multidimensional coordinate xlon should not have the same name as dimension'] assert result.msgs[0] == 'xlon shares the same name as one of its dimensions' result = result_dict[u'§5.0 multidimensional coordinate xlat should not have the same name as dimension'] assert result.msgs[0] == 'xlat shares the same name as one of its dimensions' assert (scored, out_of) == (2, 4) def test_64bit(self): dataset = self.load_dataset(STATIC_FILES['ints64']) suite = CheckSuite() suite.checkers = { 'cf' : CFBaseCheck } suite.run(dataset, 'cf') def test_variable_feature_check(self): dataset = self.load_dataset(STATIC_FILES['bad-trajectory']) results = self.cf.check_variable_features(dataset) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} result = result_dict[u'§9.1 Feature Type for temperature is valid trajectory'] assert result.msgs[0] == 'temperature is not a trajectory, it is detected as a point' dataset = self.load_dataset(STATIC_FILES['trajectory-complete']) results = self.cf.check_variable_features(dataset) scored, out_of, messages = self.get_results(results) assert scored == out_of dataset = self.load_dataset(STATIC_FILES['trajectory-implied']) results = self.cf.check_variable_features(dataset) scored, out_of, messages = self.get_results(results) assert scored == out_of def test_check_cell_methods(self): dataset = self.load_dataset(STATIC_FILES['climatology']) results = self.cf.check_cell_methods(dataset) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} result = result_dict[u'§7.1 temperature has a valid cell_methods attribute format'] assert result result = result_dict[u'§7.3 temperature has valid methods in cell_methods attribute'] assert result result = result_dict[u'§7.3 temperature has valid names in cell_methods attribute'] assert result nc_obj = MockTimeSeries() nc_obj.createVariable('temperature', 'd', ('time',)) temp = nc_obj.variables['temperature'] temp.cell_methods = 'lat: lon: mean depth: mean (interval: 20 meters)' results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} modifier_results = result_dict[u'§7.3.3 temperature has valid cell_methods modifiers'] self.assertTrue(modifier_results.value == (3, 3)) # modify the cell methods to something invalid temp.cell_methods = 'lat: lon: mean depth: mean (interval: x whizbangs)' results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} modifier_results = result_dict[u'§7.3.3 temperature has valid cell_methods modifiers'] self.assertFalse(modifier_results.value == (3, 3)) self.assertTrue('temperature:cell_methods contains an interval value that does not parse as a numeric value: "x".' in messages) self.assertTrue('temperature:cell_methods interval units "whizbangs" is not parsable by UDUNITS.' in messages) temp.cell_methods = 'lat: lon: mean depth: mean (comment: should not go here interval: 2.5 m)' results = self.cf.check_cell_methods(nc_obj) self.assertTrue('The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature') # standalone comments require no keyword temp.cell_methods = 'lon: mean (This is a standalone comment)' results = self.cf.check_cell_methods(nc_obj) result_dict = {result.name: result for result in results} modifier_results = result_dict[u'§7.3.3 temperature has valid cell_methods modifiers'] self.assertTrue(modifier_results.value == (1, 1)) temp.cell_methods = 'lat: lon: mean depth: mean (invalid_keyword: this is invalid)' results = self.cf.check_cell_methods(nc_obj) self.assertTrue('Invalid cell_methods keyword "invalid_keyword" for variable temperature. Must be one of [interval, comment]') temp.cell_methods = 'lat: lon: mean depth: mean (interval: 0.2 m comment: This should come last interval: 0.01 degrees)' results = self.cf.check_cell_methods(nc_obj) self.assertTrue('The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature') temp.cell_methods = 'lat: lon: mean depth: mean (interval 0.2 m interval: 0.01 degrees)' results = self.cf.check_cell_methods(nc_obj) self.assertTrue('Parenthetical content inside cell_methods is not well formed: interval 0.2 m interval: 0.01 degrees') # -------------------------------------------------------------------------------- # Utility Method Tests # -------------------------------------------------------------------------------- def test_temporal_unit_conversion(self): self.assertTrue(units_convertible('hours', 'seconds')) self.assertFalse(units_convertible('hours', 'hours since 2000-01-01')) def test_units_temporal(self): self.assertTrue(units_temporal('hours since 2000-01-01')) self.assertFalse(units_temporal('hours')) self.assertFalse(units_temporal('days since the big bang'))
class TestCF(unittest.TestCase): # @see # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/ def shortDescription(self): return None # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests # ion.module:TestClassName.test_function_name def __repr__(self): name = self.id() name = name.split('.') if name[0] not in ["ion", "pyon"]: return "%s (%s)" % (name[-1], '.'.join(name[:-1])) else: return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" + '.'.join(name[-2:])) __str__ = __repr__ def setUp(self): ''' Initialize the dataset ''' self.cf = CFBaseCheck() # -------------------------------------------------------------------------------- # Helper Methods # -------------------------------------------------------------------------------- def new_nc_file(self): ''' Make a new temporary netCDF file for the scope of the test ''' nc_file_path = os.path.join(gettempdir(), 'example.nc') if os.path.exists(nc_file_path): raise IOError('File Exists: %s' % nc_file_path) nc = Dataset(nc_file_path, 'w') self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) return nc def load_dataset(self, nc_dataset): ''' Return a loaded NC Dataset for the given path ''' if not isinstance(nc_dataset, str): raise ValueError("nc_dataset should be a string") nc_dataset = Dataset(nc_dataset, 'r') self.addCleanup(nc_dataset.close) return nc_dataset def get_results(self, results): ''' Returns a tuple of the value scored, possible, and a list of messages in the result set. ''' out_of = 0 scored = 0 for r in results: if isinstance(r.value, tuple): out_of += r.value[1] scored += r.value[0] else: out_of += 1 scored += int(r.value) # Store the messages messages = [] for r in results: messages.extend(r.msgs) return scored, out_of, messages # -------------------------------------------------------------------------------- # Compliance Tests # -------------------------------------------------------------------------------- def test_check_data_types(self): """ 2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_data_types(dataset) self.assertTrue(result.value) dpair = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_data_types(dpair) assert result.value == (5, 6) def test_naming_conventions(self): ''' Section 2.3 Naming Conventions Variable, dimension and attribute names should begin with a letter and be composed of letters, digits, and underscores. ''' dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_naming_conventions(dataset) num_var = len(dataset.variables) expected = (num_var,) * 2 self.assertEqual(result.value, expected) dataset = self.load_dataset(STATIC_FILES['bad']) result = self.cf.check_naming_conventions(dataset) num_var = len(dataset.variables) expected = (num_var - 1, num_var) self.assertEqual(result.value, expected) assert '_poor_dim' in result.msgs[0] def test_check_names_unique(self): """ 2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same. """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_names_unique(dataset) num_var = len(dataset.variables) expected = (num_var,) * 2 self.assertEqual(result.value, expected) # TODO: Add bad unique names to bad.nc def test_check_dimension_names(self): """ 2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_dimension_names(dataset) assert result.value == (5, 6) def test_check_dimension_order(self): """ 2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z), "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y, then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the left of the spatiotemporal dimensions. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_dimension_order(dataset) assert result.value == (11, 12) def test_check_fill_value_outside_valid_range(self): """ 2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_fill_value_outside_valid_range(dataset) assert sum((result.value for result in results)) == 1 assert len(results) == 2 def test_check_conventions_are_cf_16(self): """ 2.6.1 the NUG defined global attribute Conventions to the string value "CF-1.6" """ # :Conventions = "CF-1.6" dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "CF-1.6 ,ACDD" ; dataset = self.load_dataset(STATIC_FILES['conv_multi']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "NoConvention" dataset = self.load_dataset(STATIC_FILES['conv_bad']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertFalse(result.value) def test_check_convention_globals(self): """ 2.6.2 title/history global attributes, must be strings. Do not need to exist. """ # check for pass dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_convention_globals(dataset) for each in result: self.assertTrue(each.value) # check if it doesn't exist that we pass dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_convention_globals(dataset) for each in result: self.assertTrue(each.value) def test_check_convention_possibly_var_attrs(self): """ 3.1 The units attribute is required for all variables that represent dimensional quantities (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables defined in Section 7.4, "Climatological Statistics"). Units are not required for dimensionless quantities. A variable with no units attribute is assumed to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be included. - units required - type must be recognized by udunits - if std name specified, must be consistent with standard name table, must also be consistent with a specified cell_methods attribute if present """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_convention_possibly_var_attrs(dataset) for each in result: self.assertTrue(each.value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_convention_possibly_var_attrs(dataset) for each in result: self.assertFalse(each.value) def test_check_standard_name(self): """ 3.3 A standard name is associated with a variable via the attribute standard_name which takes a string value comprised of a standard name optionally followed by one or more blanks and a standard name modifier """ dataset = self.load_dataset(STATIC_FILES['2dim']) result = self.cf.check_standard_name(dataset) for each in result: self.assertTrue(each.value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_standard_name(dataset) for each in result: self.assertFalse(each.value) def test_download_standard_name_table(self): """ Test that a user can download a specific standard name table """ version = '35' data_directory = create_cached_data_dir() location = os.path.join(data_directory, 'cf-standard-name-table-test-{0}.xml'.format(version)) download_cf_standard_name_table(version, location) # Test that the file now exists in location and is the right version self.assertTrue(os.path.isfile(location)) std_names = StandardNameTable(location) self.assertEqual(std_names._version, version) self.addCleanup(os.remove, location) def test_check_flags(self): dataset = self.load_dataset(STATIC_FILES['self_referencing']) results = self.cf.check_flags(dataset) scored, out_of, messages = self.get_results(results) self.assertEqual(scored, 46) self.assertEqual(out_of, 59) self.assertEqual(messages.count('flag_values must be a list'), 6) m_str = r"'flag_values' attribute for variable '\w+' does not have same type \(fv: [<>]?\w+, v: [<>]?\w+\)" # make sure flag_values attribute where not equal to variable type # has the proper message self.assertEqual(sum(bool(re.match(m_str, msg)) for msg in messages), 7) def test_check_bad_units(self): dataset = self.load_dataset(STATIC_FILES['2dim']) result = self.cf.check_units(dataset) for each in result: self.assertTrue(each.value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_units(dataset) for each in result: self.assertFalse(each.value) def test_coordinate_types(self): ''' Section 4 Coordinate Types We strongly recommend that coordinate variables be used for all coordinate types whenever they are applicable. ''' dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_coordinate_vars_for_all_coordinate_types(dataset) for each in result: self.assertTrue(each.value) def test_check_coordinate_axis_attr(self): dataset = self.load_dataset(STATIC_FILES['2dim']) result = self.cf.check_coordinate_axis_attr(dataset) for each in result: self.assertTrue(each.value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_coordinate_axis_attr(dataset) for each in result: if each.name[1] in ['time', 'latitude']: self.assertTrue(each.value) if each.name[1] in ['salinity']: if each.name[2] not in ['does_not_depend_on_mult_coord_vars']: self.assertFalse(each.value) def test_latitude(self): ''' Section 4.1 Latitude Coordinate ''' # Check compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_latitude(dataset) for r in results: if isinstance(r.value, tuple): self.assertEqual(r.value[0], r.value[1]) else: self.assertTrue(r.value) # Verify non-compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) assert 'lat does not have units attribute' in messages assert 'lat_uv units are acceptable, but not recommended' in messages assert 'lat_like does not have units attribute' in messages assert scored == 5 assert out_of == 12 def test_longitude(self): ''' Section 4.2 Longitude Coordinate ''' # Check compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_longitude(dataset) for r in results: if isinstance(r.value, tuple): self.assertEqual(r.value[0], r.value[1]) else: self.assertTrue(r.value) # Verify non-compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_longitude(dataset) scored, out_of, messages = self.get_results(results) assert 'lon does not have units attribute' in messages assert 'lon_uv units are acceptable, but not recommended' in messages assert 'lon_like does not have units attribute' in messages assert scored == 5 assert out_of == 12 def test_is_vertical_coordinate(self): ''' Section 4.3 Qualifiers for Vertical Coordinate NOTE: The standard doesn't explicitly say that vertical coordinates must be a coordinate type. ''' # Make something that I can attach attrs to mock_variable = MockVariable # Proper name/standard_name known_name = mock_variable() known_name.standard_name = 'depth' self.assertTrue(is_vertical_coordinate('not_known', known_name)) # Proper Axis axis_set = mock_variable() axis_set.axis = 'Z' self.assertTrue(is_vertical_coordinate('not_known', axis_set)) # Proper units units_set = mock_variable() units_set.units = 'dbar' self.assertTrue(is_vertical_coordinate('not_known', units_set)) # Proper units/positive positive = mock_variable() positive.units = 'm' positive.positive = 'up' self.assertTrue(is_vertical_coordinate('not_known', positive)) def test_vertical_coordinate(self): ''' Section 4.3 Vertical (Height or Depth) coordinate ''' # Check compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_vertical_coordinate(dataset) for r in results: self.assertTrue(r.value) # Check non-compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert 'height does not have units' in messages assert 'vertical variable depth needs to define positive attribute' assert 'vertical variable depth2 needs to define positive attribute' def test_vertical_dimension(self): ''' Section 4.3.1 Dimensional Vertical Coordinate ''' # Check for compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_dimensional_vertical_coordinate(dataset) for r in results: self.assertTrue(r.value) # Check for non-compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_dimensional_vertical_coordinate(dataset) for r in results: self.assertFalse(r.value) def test_appendix_d(self): ''' CF 1.6 Appendix D The definitions given here allow an application to compute dimensional coordinate values from the dimensionless ones and associated variables. The formulas are expressed for a gridpoint (n,k,j,i) where i and j are the horizontal indices, k is the vertical index and n is the time index. A coordinate variable is associated with its definition by the value of the standard_name attribute. The terms in the definition are associated with file variables by the formula_terms attribute. The formula_terms attribute takes a string value, the string being comprised of blank-separated elements of the form "term: variable", where term is a keyword that represents one of the terms in the definition, and variable is the name of the variable in a netCDF file that contains the values for that term. The order of elements is not significant. ''' dimless = dict(dimless_vertical_coordinates) def verify(std_name, test_str): regex_matches = re.match(dimless[std_name], test_str) self.assertIsNotNone(regex_matches) # For each of the listed dimensionless vertical coordinates, # verify that the formula_terms match the provided regex verify('atmosphere_ln_pressure_coordinate', "p0: var1 lev: var2") verify('atmosphere_sigma_coordinate', "sigma: var1 ps: var2 ptop: var3") verify('atmosphere_hybrid_sigma_pressure_coordinate', "a: var1 b: var2 ps: var3 p0: var4") verify('atmosphere_hybrid_height_coordinate', "a: var1 b: var2 orog: var3") verify('atmosphere_sleve_coordinate', "a: var1 b1: var2 b2: var3 ztop: var4 zsurf1: var5 zsurf2: var6") verify('ocean_sigma_coordinate', "sigma: var1 eta: var2 depth: var3") verify('ocean_s_coordinate', "s: var1 eta: var2 depth: var3 a: var4 b: var5 depth_c: var6") verify('ocean_sigma_z_coordinate', "sigma: var1 eta: var2 depth: var3 depth_c: var4 nsigma: var5 zlev: var6") verify('ocean_double_sigma_coordinate', "sigma: var1 depth: var2 z1: var3 z2: var4 a: var5 href: var6 k_c: var7") def test_dimensionless_vertical(self): ''' Section 4.3.2 ''' # Check affirmative compliance dataset = self.load_dataset(STATIC_FILES['dimensionless']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) for r in results: self.assertTrue(r.value) # Check negative compliance dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert 'formula_terms missing from dimensionless coordinate lev1' in messages assert 'formula_terms not defined for dimensionless coordinate lev1' in messages assert 'var1 missing for dimensionless coordinate lev2' in messages assert 'var2 missing for dimensionless coordinate lev2' in messages assert 'var3 missing for dimensionless coordinate lev2' in messages assert scored == 1 assert out_of == 4 def test_is_time_variable(self): var1 = MockVariable() var1.standard_name = 'time' self.assertTrue(is_time_variable('not_time', var1)) var2 = MockVariable() self.assertTrue(is_time_variable('time', var2)) self.assertFalse(is_time_variable('not_time', var2)) var3 = MockVariable() var3.axis = 'T' self.assertTrue(is_time_variable('maybe_time', var3)) var4 = MockVariable() var4.units = 'seconds since 1900-01-01' self.assertTrue(is_time_variable('maybe_time', var4)) def test_check_time_coordinate(self): dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_time_coordinate(dataset) for r in results: self.assertTrue(r.value) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_time_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert 'bad_time_1 does not have units' in messages assert 'bad_time_2 doesn not have correct time units' in messages assert scored == 1 assert out_of == 3 def test_check_calendar(self): dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_calendar(dataset) for r in results: self.assertTrue(r.value) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_calendar(dataset) scored, out_of, messages = self.get_results(results) assert 'Variable bad_time_1 should have a calendar attribute' in messages assert "Variable bad_time_2 should have a valid calendar: 'nope' is not a valid calendar" in messages def test_self_referencing(self): ''' This test captures a check where a coordinate has circular references ''' dataset = self.load_dataset(STATIC_FILES['self_referencing']) results = self.cf.check_two_dimensional(dataset) scored, out_of, messages = self.get_results(results) assert "Variable LATITUDE's coordinate references itself" in messages assert scored == 1 assert out_of == 3 dataset = self.load_dataset(STATIC_FILES['valid_coordinates']) results = self.cf.check_two_dimensional(dataset) scored, out_of, messages = self.get_results(results) assert out_of == 4 assert scored == 4 assert "Variable CD_310's coordinate references itself" not in messages def test_check_independent_axis_dimensions(self): dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_independent_axis_dimensions(dataset) for r in results: self.assertTrue(r.value) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_independent_axis_dimensions(dataset) scored, out_of, messages = self.get_results(results) assert 'The lev dimension for the variable lev1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert 'The lev dimension for the variable lev2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert 'The time dimension for the variable bad_time_1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert 'The time dimension for the variable bad_time_2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert 'The time dimension for the variable column_temp does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert scored == 6 assert out_of == 11 def test_check_two_dimensional(self): dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_two_dimensional(dataset) for r in results: self.assertTrue(r.value) # Need the bad testing dataset = self.load_dataset(STATIC_FILES['bad2dim']) results = self.cf.check_two_dimensional(dataset) scored, out_of, messages = self.get_results(results) assert "Variable T's coordinate, lat, is not a coordinate or auxiliary variable" in messages assert "coordinate lat is not a correct lat/lon variable" in messages assert "Variable C's coordinate, lat_p, does not share dimension x with the variable" in messages def test_check_reduced_horizontal_grid(self): dataset = self.load_dataset(STATIC_FILES['rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) rd = { r.name[1] : r.value for r in results } self.assertTrue(rd['PS']) dataset = self.load_dataset(STATIC_FILES['bad-rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) rd = { r.name[1] : (r.value, r.msgs) for r in results } for name, (value, msg) in rd.items(): self.assertFalse(value) self.assertIn('Coordinate longitude is not a proper variable', rd['PSa'][1]) self.assertIn("Coordinate latitude's dimension, latdim, is not a dimension of PSb", rd['PSb'][1]) assert 'PSc' not in list(rd.keys()) def test_check_horz_crs_grid_mappings_projections(self): dataset = self.load_dataset(STATIC_FILES['mapping']) results = self.cf.check_horz_crs_grid_mappings_projections(dataset) rd = { r.name[1] : r.value for r in results } assert rd['wgs84'] == (3, 3) assert rd['epsg'] == (7, 8) def test_check_scalar_coordinate_system(self): dataset = self.load_dataset(STATIC_FILES['scalar_coordinate_variable']) results = self.cf.check_scalar_coordinate_system(dataset) self.assertEqual(len(results), 2) for r in results: if r.name[1] == 'HEIGHT': self.assertEqual(r.value, (0, 1)) elif r.name[1] == 'DEPTH': self.assertEqual(r.value, (2, 2)) else: self.assertTrue(False, 'Unexpected variable in results of check_scalar_coordinate_system') def test_check_geographic_region(self): dataset = self.load_dataset(STATIC_FILES['bad_region']) results = self.cf.check_geographic_region(dataset) self.assertFalse(results[0].value) self.assertTrue(results[1].value) # def test_check_cell_boundaries(self): # dataset = self.load_dataset(STATIC_FILES['bad_data_type']) # results = self.cf.check_cell_boundaries(dataset) # print results # self.assertTrue(results[0].value) def test_check_packed_data(self): dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_packed_data(dataset) self.assertEqual(len(results), 4) self.assertFalse(results[0].value) self.assertTrue(results[1].value) self.assertTrue(results[2].value) self.assertFalse(results[3].value) def test_check_compression(self): dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_compression(dataset) assert results[0].value == (2, 2) assert results[1].value == (0, 2) def test_check_all_features_are_same_type(self): dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_all_features_are_same_type(dataset) assert results is None dataset = self.load_dataset(STATIC_FILES['featureType']) results = self.cf.check_all_features_are_same_type(dataset) self.assertTrue(results.value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_all_features_are_same_type(dataset) self.assertFalse(results.value) def test_check_orthogonal_multidim_array(self): dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_orthogonal_multidim_array(dataset) for each in results: self.assertTrue(each.value) def test_check_incomplete_multidim_array(self): dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_incomplete_multidim_array(dataset) for each in results: self.assertTrue(each.value) def test_check_contiguous_ragged_array(self): dataset = self.load_dataset(STATIC_FILES['cont_ragged']) results = self.cf.check_contiguous_ragged_array(dataset) for each in results: self.assertTrue(each.value) def test_check_indexed_ragged_array(self): dataset = self.load_dataset(STATIC_FILES['index_ragged']) results = self.cf.check_indexed_ragged_array(dataset) for each in results: self.assertTrue(each.value) def test_check_feature_type(self): dataset = self.load_dataset(STATIC_FILES['index_ragged']) results = self.cf.check_feature_type(dataset) self.assertTrue(results.value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_feature_type(dataset) self.assertFalse(results.value) def test_check_coordinates_and_metadata(self): dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_coordinates_and_metadata(dataset) self.assertFalse(results[0].value) self.assertTrue(results[1].value) self.assertFalse(results[2].value) dataset = self.load_dataset(STATIC_FILES['index_ragged']) results = self.cf.check_coordinates_and_metadata(dataset) self.assertTrue(results[-1].value) dataset = self.load_dataset(STATIC_FILES['coordinates_and_metadata']) results = self.cf.check_coordinates_and_metadata(dataset) self.assertTrue(len(results) == 2) self.assertFalse(results[0].value) self.assertFalse(results[1].value) def test_check_missing_data(self): dataset = self.load_dataset(STATIC_FILES['index_ragged']) results = self.cf.check_missing_data(dataset) for each in results: self.assertTrue(each.value) dataset = self.load_dataset(STATIC_FILES['bad_missing_data']) results = self.cf.check_missing_data(dataset) for each in results: self.assertFalse(each.value) def test_check_units(self): ''' Ensure that container variables are not checked for units but geophysical variables are ''' dataset = self.load_dataset(STATIC_FILES['units_check']) results = self.cf.check_units(dataset) # We don't keep track of the variables names for checks that passed, so # we can make a strict assertion about how many checks were performed # and if there were errors, which there shouldn't be. scored, out_of, messages = self.get_results(results) assert scored == 4 assert out_of == 4 assert messages == [] def test_64bit(self): dataset = self.load_dataset(STATIC_FILES['ints64']) suite = CheckSuite() suite.checkers = { 'cf' : CFBaseCheck } suite.run(dataset, 'cf') def test_time_units(self): dataset = self.load_dataset(STATIC_FILES['time_units']) results = self.cf.check_units(dataset) scored, out_of, messages = self.get_results(results) assert 'units are days since 1970-01-01, standard_name units should be K' in messages assert scored == 1 assert out_of == 2 # -------------------------------------------------------------------------------- # Utility Method Tests # -------------------------------------------------------------------------------- def test_temporal_unit_conversion(self): self.assertTrue(units_convertible('hours', 'seconds')) self.assertFalse(units_convertible('hours', 'hours since 2000-01-01')) def test_units_temporal(self): self.assertTrue(units_temporal('hours since 2000-01-01')) self.assertFalse(units_temporal('hours')) self.assertFalse(units_temporal('days since the big bang'))
class TestCF(BaseTestCase): def setUp(self): ''' Initialize the dataset ''' self.cf = CFBaseCheck() # -------------------------------------------------------------------------------- # Helper Methods # -------------------------------------------------------------------------------- def new_nc_file(self): ''' Make a new temporary netCDF file for the scope of the test ''' nc_file_path = os.path.join(gettempdir(), 'example.nc') if os.path.exists(nc_file_path): raise IOError('File Exists: %s' % nc_file_path) nc = Dataset(nc_file_path, 'w') self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) return nc def load_dataset(self, nc_dataset): ''' Return a loaded NC Dataset for the given path ''' if not isinstance(nc_dataset, str): raise ValueError("nc_dataset should be a string") nc_dataset = Dataset(nc_dataset, 'r') self.addCleanup(nc_dataset.close) return nc_dataset def get_results(self, results): ''' Returns a tuple of the value scored, possible, and a list of messages in the result set. ''' out_of = 0 scored = 0 for r in results: if isinstance(r.value, tuple): out_of += r.value[1] scored += r.value[0] else: out_of += 1 scored += int(r.value) # Store the messages messages = [] for r in results: messages.extend(r.msgs) return scored, out_of, messages # -------------------------------------------------------------------------------- # Compliance Tests # -------------------------------------------------------------------------------- def test_check_data_types(self): """ Invoke check_data_types() and loop through all variables to check data types. Pertains to 2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable. """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_data_types(dataset) assert result.value[0] == result.value[1] dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_data_types(dataset) assert result.msgs[0] == u'The variable temp failed because the datatype is int64' assert result.value == (6, 7) def test_check_child_attr_data_types(self): """ Tests check_child_attr_data_types() to ensure the attributes specified in Section 2.5.1 have a matching data type to their parent variables.""" # create dataset using MockDataset (default constructor gives it time dimension) ds = MockTimeSeries() ds.createVariable("temp", np.float64, dimensions=("time")) # add variable "temp" with dimension "time" # check where no special data attrs are present, should result good result = self.cf.check_child_attr_data_types(ds) # checks all special attrs for all variables self.assert_result_is_good(result) # give temp _FillValue as a float, expect good result ds.variables['temp'].setncattr("_FillValue", np.float(99999999999999999999.)) result = self.cf.check_child_attr_data_types(ds) self.assert_result_is_good(result) # give temp valid_range as an array of floats, all should check out ds.variables['temp'].setncattr("valid_range", np.array([35., 38.])) result = self.cf.check_child_attr_data_types(ds) self.assert_result_is_good(result) # now give invalid integer for valid_min; above two should still check out, this one should fail ds.variables['temp'].setncattr("valid_min", 45) result = self.cf.check_child_attr_data_types(ds) self.assert_result_is_bad(result) # now give invalid string for valid_max ds.variables['temp'].setncattr("valid_max", "eighty") result = self.cf.check_child_attr_data_types(ds) self.assert_result_is_bad(result) # TODO for CF-1.7: actual_range, actual_min/max def test_naming_conventions(self): ''' Section 2.3 Naming Conventions Variable, dimension and attr names should begin with a letter and be composed of letters, digits, and underscores. ''' # compliant dataset dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_naming_conventions(dataset) scored, out_of, messages = self.get_results(results) assert scored == out_of # non-compliant dataset dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_naming_conventions(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 3 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 assert all(r.name == u'§2.3 Naming Conventions' for r in results) # another non-compliant dataset dataset = self.load_dataset(STATIC_FILES['chap2']) results = self.cf.check_naming_conventions(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 3 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 assert all(r.name == u'§2.3 Naming Conventions' for r in results) def test_check_names_unique(self): """ 2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same. """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_names_unique(dataset) num_var = len(dataset.variables) expected = (num_var,) * 2 self.assertEqual(result.value, expected) dataset = self.load_dataset(STATIC_FILES['chap2']) result = self.cf.check_names_unique(dataset) assert result.value == (6, 7) assert result.msgs[0] == u'Variables are not case sensitive. Duplicate variables named: not_unique' def test_check_dimension_names(self): """ 2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_dimension_names(dataset) assert result.value == (6, 7) dataset = self.load_dataset(STATIC_FILES['chap2']) result = self.cf.check_dimension_names(dataset) assert result.msgs[0] == u'no_reason has two or more dimensions named time' def test_check_dimension_order(self): """ 2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z), "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y, then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the left of the spatiotemporal dimensions. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_dimension_order(dataset) assert result.value == (5, 6) assert result.msgs[0] == (u"really_bad's dimensions are not in the recommended order " "T, Z, Y, X. They are latitude, power") dataset = self.load_dataset(STATIC_FILES['dimension_order']) result = self.cf.check_dimension_order(dataset) self.assertEqual((3, 3), result.value) self.assertEqual([], result.msgs) def test_check_fill_value_outside_valid_range(self): """ 2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable. """ dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_fill_value_outside_valid_range(dataset) assert result.msgs[0] == (u'salinity:_FillValue (1.0) should be outside the ' 'range specified by valid_min/valid_max (-10, 10)') dataset = self.load_dataset(STATIC_FILES['chap2']) result = self.cf.check_fill_value_outside_valid_range(dataset) assert result.value == (1, 2) assert result.msgs[0] == (u'wind_speed:_FillValue (12.0) should be outside the ' 'range specified by valid_min/valid_max (0.0, 20.0)') def test_check_conventions_are_cf_16(self): """ §2.6.1 the NUG defined global attribute Conventions to the string value "CF-1.6" """ # :Conventions = "CF-1.6" dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "CF-1.6 ,ACDD" ; dataset = self.load_dataset(STATIC_FILES['conv_multi']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "NoConvention" dataset = self.load_dataset(STATIC_FILES['conv_bad']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertFalse(result.value) assert result.msgs[0] == (u'§2.6.1 Conventions global attribute does not contain ' '"CF-1.6". The CF Checker only supports CF-1.6 ' 'at this time.') def test_check_convention_globals(self): """ Load up a dataset and ensure title and history global attrs are checked properly (§2.6.2). """ # check for pass dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_convention_globals(dataset) assert result.value[0] == result.value[1] # check if it doesn't exist that we pass dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_convention_globals(dataset) assert result.value[0] != result.value[1] assert result.msgs[0] == u'§2.6.2 global attribute title should exist and be a non-empty string' def test_check_convention_possibly_var_attrs(self): """ §2.6.2 The units attribute is required for all variables that represent dimensional quantities (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables defined in Section 7.4, "Climatological Statistics"). Units are not required for dimensionless quantities. A variable with no units attribute is assumed to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be included. - units required - type must be recognized by udunits - if std name specified, must be consistent with standard name table, must also be consistent with a specified cell_methods attribute if present """ dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_convention_possibly_var_attrs(dataset) # 10x comment attrs # 1x institution # 1x source # 1x EMPTY references assert result.value[0] != result.value[1] assert result.msgs[0] == u"§2.6.2 references global attribute should be a non-empty string" # load bad_data_type.nc dataset = self.load_dataset(STATIC_FILES['bad_data_type']) result = self.cf.check_convention_possibly_var_attrs(dataset) # no references # institution is a 10L # no source # comments don't matter unless they're empty assert result.value[0] != result.value[1] assert result.msgs[0] == u'§2.6.2 salinity:institution should be a non-empty string' def test_check_standard_name(self): """ 3.3 A standard name is associated with a variable via the attribute standard_name which takes a string value comprised of a standard name optionally followed by one or more blanks and a standard name modifier """ dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_standard_name(dataset) for each in results: self.assertTrue(each.value) # load failing ds dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_standard_name(dataset) score, out_of, messages = self.get_results(results) # 9 vars checked, 8 fail assert len(results) == 9 assert score < out_of assert all(r.name == u"§3.3 Standard Name" for r in results) #load different ds -- ll vars pass this check dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid']) results = self.cf.check_standard_name(dataset) score, out_of, messages = self.get_results(results) assert score == out_of def test_cell_bounds(self): dataset = self.load_dataset(STATIC_FILES['grid-boundaries']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) assert (score, out_of) == (2, 2) dataset = self.load_dataset(STATIC_FILES['cf_example_cell_measures']) results = self.cf.check_cell_boundaries(dataset) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_cell_boundaries(dataset) dataset = self.load_dataset(STATIC_FILES['bounds_bad_order']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) # Make sure that the rgrid coordinate variable isn't checked for standard_name assert (score, out_of) == (0, 2) dataset = self.load_dataset(STATIC_FILES['bounds_bad_num_coords']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) assert (score, out_of) == (0, 2) dataset = self.load_dataset(STATIC_FILES['1d_bound_bad']) results = self.cf.check_cell_boundaries(dataset) score, out_of, messages = self.get_results(results) def test_cell_measures(self): dataset = self.load_dataset(STATIC_FILES['cell_measure']) results = self.cf.check_cell_measures(dataset) score, out_of, messages = self.get_results(results) assert score == out_of assert score > 0 dataset = self.load_dataset(STATIC_FILES['bad_cell_measure1']) results = self.cf.check_cell_measures(dataset) score, out_of, messages = self.get_results(results) message = ("The cell_measures attribute for variable PS is formatted incorrectly. " "It should take the form of either 'area: cell_var' or 'volume: cell_var' " "where cell_var is the variable describing the cell measures") assert message in messages dataset = self.load_dataset(STATIC_FILES['bad_cell_measure2']) results = self.cf.check_cell_measures(dataset) score, out_of, messages = self.get_results(results) message = u'Cell measure variable PS referred to by box_area is not present in dataset variables' assert message in messages def test_climatology_cell_methods(self): """ Checks that climatology cell_methods strings are properly validated """ dataset = self.load_dataset(STATIC_FILES['climatology']) results = self.cf.check_climatological_statistics(dataset) # cell methods in this file is # "time: mean within days time: mean over days" score, out_of, messages = self.get_results(results) self.assertEqual(score, out_of) temp_var = dataset.variables['temperature'] = \ MockVariable(dataset.variables['temperature']) temp_var.cell_methods = 'INVALID' results = self.cf.check_climatological_statistics(dataset) score, out_of, messages = self.get_results(results) self.assertNotEqual(score, out_of) # incorrect time units temp_var.cell_methods = "time: mean within years time: mean over days" results = self.cf.check_climatological_statistics(dataset) score, out_of, messages = self.get_results(results) self.assertNotEqual(score, out_of) # can only have third method over years if first two are within and # over days, respectively temp_var.cell_methods = "time: mean within years time: mean over years time: sum over years" results = self.cf.check_climatological_statistics(dataset) score, out_of, messages = self.get_results(results) self.assertNotEqual(score, out_of) # this, on the other hand, should work. temp_var.cell_methods = "time: mean within days time: mean over days time: sum over years" results = self.cf.check_climatological_statistics(dataset) score, out_of, messages = self.get_results(results) self.assertEqual(score, out_of) # parenthesized comment to describe climatology temp_var.cell_methods = "time: sum within days time: maximum over days (ENSO years)" results = self.cf.check_climatological_statistics(dataset) score, out_of, messages = self.get_results(results) self.assertEqual(score, out_of) def test_check_ancillary_variables(self): ''' Test to ensure that ancillary variables are properly checked ''' dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_ancillary_variables(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§3.4 Ancillary Data'] assert result.value == (2, 2) dataset = self.load_dataset(STATIC_FILES['bad_reference']) results = self.cf.check_ancillary_variables(dataset) result_dict = {result.name: result for result in results} result = result_dict[u'§3.4 Ancillary Data'] assert result.value == (1, 2) assert u"temp_qc is not a variable in this dataset" == result.msgs[0] def test_download_standard_name_table(self): """ Test that a user can download a specific standard name table """ version = '35' data_directory = create_cached_data_dir() location = os.path.join(data_directory, 'cf-standard-name-table-test-{0}.xml'.format(version)) download_cf_standard_name_table(version, location) # Test that the file now exists in location and is the right version self.assertTrue(os.path.isfile(location)) std_names = StandardNameTable(location) self.assertEqual(std_names._version, version) self.addCleanup(os.remove, location) def test_bad_standard_name_table(self): """ Test that failure in case a bad standard name table is passed. """ # would this ever actually be reached by the code? with pytest.raises(IOError): StandardNameTable('dummy_non_existent_file.ext') nc_obj = MockTimeSeries() nc_obj.standard_name_table = 'dummy_non_existent_file.ext' self.assertFalse(self.cf._find_cf_standard_name_table(nc_obj)) nc_obj.standard_name_table = np.array([], np.float64) self.assertFalse(self.cf._find_cf_standard_name_table(nc_obj)) def test_check_flags(self): """Test that the check for flags works as expected.""" dataset = self.load_dataset(STATIC_FILES['rutgers']) results = self.cf.check_flags(dataset) scored, out_of, messages = self.get_results(results) # only 4 variables in this dataset do not have perfect scores imperfect = [r.value for r in results if r.value[0] < r.value[1]] assert len(imperfect) == 4 def test_check_flag_masks(self): dataset = self.load_dataset(STATIC_FILES['ghrsst']) results = self.cf.check_flags(dataset) scored, out_of, messages = self.get_results(results) # This is an example of a perfect dataset for flags assert scored > 0 assert scored == out_of def test_check_bad_units(self): """Load a dataset with units that are expected to fail (bad_units.nc). There are 6 variables in this dataset, three of which should give an error: - time, with units "s" (should be <units> since <epoch>) - lat, with units "degrees_E" (should be degrees) - lev, with units "level" (deprecated)""" dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_units(dataset) for result in results: self.assert_result_is_good(result) # Not sure why bad_data_type was being used, we have a dataset specifically for bad units # dataset = self.load_dataset(STATIC_FILES['bad_data_type']) dataset = self.load_dataset(STATIC_FILES['bad_units']) all_results = self.cf.check_units(dataset) # use itertools.chain() to unpack the lists of messages results_list = list(chain(*(r.msgs for r in all_results if r.msgs))) # check the results only have '§3.1 Units' as the header assert all(r.name == u'§3.1 Units' for r in all_results) # check that all the expected variables have been hit assert all(any(s in msg for msg in results_list) for s in ["time", "lat", "lev"]) def test_latitude(self): ''' Section 4.1 Latitude Coordinate ''' # Check compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_latitude(dataset) score, out_of, messages = self.get_results(results) assert score == out_of # Verify non-compliance -- 9/12 pass dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 12 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 3 assert (r.name == u'§4.1 Latitude Coordinates' for r in results) # check with another ds -- all 6 vars checked pass dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid']) results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 6 assert scored == out_of assert (r.name == u'§4.1 Latitude Coordinates' for r in results) # hack to avoid writing to read-only file dataset.variables['rlat'] = MockVariable(dataset.variables['rlat']) rlat = dataset.variables['rlat'] rlat.name = 'rlat' # test with a bad value rlat.units = 'degrees_north' results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) wrong_format = u"Grid latitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages) rlat.units = 'radians' results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages) def test_longitude(self): ''' Section 4.2 Longitude Coordinate ''' # Check compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_longitude(dataset) score, out_of, messages = self.get_results(results) assert score == out_of # Verify non-compliance -- 12 checked, 3 fail dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_longitude(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 12 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 3 assert all(r.name == u'§4.1 Latitude Coordinates' for r in results) # check different dataset # TODO can be improved for check_latitude too dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid']) results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) assert (scored, out_of) == (6, 6) # hack to avoid writing to read-only file dataset.variables['rlon'] = MockVariable(dataset.variables['rlon']) rlon = dataset.variables['rlon'] rlon.name = 'rlon' # test with a bad value rlon.units = 'degrees_east' results = self.cf.check_longitude(dataset) scored, out_of, messages = self.get_results(results) wrong_format = u"Grid longitude variable '{}' should use degree equivalent units without east or north components. Current units are {}" self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages) rlon.units = 'radians' results = self.cf.check_longitude(dataset) scored, out_of, messages = self.get_results(results) self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages) def test_is_vertical_coordinate(self): ''' Section 4.3 Qualifiers for Vertical Coordinate NOTE: The standard doesn't explicitly say that vertical coordinates must be a coordinate type. ''' # Make something that I can attach attrs to mock_variable = MockVariable # Proper name/standard_name known_name = mock_variable() known_name.standard_name = 'depth' self.assertTrue(is_vertical_coordinate('not_known', known_name)) # Proper Axis axis_set = mock_variable() axis_set.axis = 'Z' self.assertTrue(is_vertical_coordinate('not_known', axis_set)) # Proper units units_set = mock_variable() units_set.units = 'dbar' self.assertTrue(is_vertical_coordinate('not_known', units_set)) # Proper units/positive positive = mock_variable() positive.units = 'm' positive.positive = 'up' self.assertTrue(is_vertical_coordinate('not_known', positive)) def test_vertical_dimension(self): ''' Section 4.3.1 Dimensional Vertical Coordinate ''' # Check for compliance dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_dimensional_vertical_coordinate(dataset) assert len(results) == 1 assert all(r.name == u'§4.3 Vertical Coordinate' for r in results) # non-compliance -- one check fails dataset = self.load_dataset(STATIC_FILES['illegal-vertical']) results = self.cf.check_dimensional_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 1 assert all(r.name == u'§4.3 Vertical Coordinate' for r in results) assert scored < out_of def test_appendix_d(self): ''' CF 1.6 Appendix D The definitions given here allow an application to compute dimensional coordinate values from the dimensionless ones and associated variables. The formulas are expressed for a gridpoint (n,k,j,i) where i and j are the horizontal indices, k is the vertical index and n is the time index. A coordinate variable is associated with its definition by the value of the standard_name attribute. The terms in the definition are associated with file variables by the formula_terms attribute. The formula_terms attribute takes a string value, the string being comprised of blank-separated elements of the form "term: variable", where term is a keyword that represents one of the terms in the definition, and variable is the name of the variable in a netCDF file that contains the values for that term. The order of elements is not significant. ''' # For each of the listed dimensionless vertical coordinates, # verify that the formula_terms match the provided set of terms self.assertTrue(no_missing_terms('atmosphere_ln_pressure_coordinate', {"p0", "lev"})) self.assertTrue(no_missing_terms('atmosphere_sigma_coordinate', {"sigma", "ps", "ptop"})) self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate', {'a', 'b', 'ps'})) # test alternative terms for # 'atmosphere_hybrid_sigma_pressure_coordinate' self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate', {'ap', 'b', 'ps'})) # check that an invalid set of terms fails self.assertFalse(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate', {'a', 'b', 'p'})) self.assertTrue(no_missing_terms('atmosphere_hybrid_height_coordinate', {"a", "b", "orog"})) # missing terms should cause failure self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate', {"a", "b"})) # excess terms should cause failure self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate', {"a", "b", "c", "orog"})) self.assertTrue(no_missing_terms('atmosphere_sleve_coordinate', {"a", "b1", "b2", "ztop", "zsurf1", "zsurf2"})) self.assertTrue(no_missing_terms('ocean_sigma_coordinate', {"sigma", "eta", "depth"})) self.assertTrue(no_missing_terms('ocean_s_coordinate', {"s", "eta", "depth", "a", "b", "depth_c"})) self.assertTrue(no_missing_terms('ocean_sigma_z_coordinate', {"sigma", "eta", "depth", "depth_c", "nsigma", "zlev"})) self.assertTrue(no_missing_terms('ocean_double_sigma_coordinate', {"sigma", "depth", "z1", "z2", "a", "href", "k_c"})) def test_dimensionless_vertical(self): ''' Section 4.3.2 ''' # Check affirmative compliance dataset = self.load_dataset(STATIC_FILES['dimensionless']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) # all variables checked (2) pass assert len(results) == 2 assert scored == out_of assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) # Check negative compliance -- 3 out of 4 pass dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 4 assert scored <= out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) # test with an invalid formula_terms dataset.variables['lev2'] = MockVariable(dataset.variables['lev2']) lev2 = dataset.variables['lev2'] lev2.formula_terms = 'a: var1 b:var2 orog:' # create a malformed formula_terms attribute and check that it fails # 2/4 still pass results = self.cf.check_dimensionless_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 4 assert scored <= out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 2 assert all(r.name == u"§4.3 Vertical Coordinate" for r in results) def test_is_time_variable(self): var1 = MockVariable() var1.standard_name = 'time' self.assertTrue(is_time_variable('not_time', var1)) var2 = MockVariable() self.assertTrue(is_time_variable('time', var2)) self.assertFalse(is_time_variable('not_time', var2)) var3 = MockVariable() var3.axis = 'T' self.assertTrue(is_time_variable('maybe_time', var3)) var4 = MockVariable() var4.units = 'seconds since 1900-01-01' self.assertTrue(is_time_variable('maybe_time', var4)) def test_dimensionless_standard_names(self): """Check that dimensionless standard names are properly detected""" std_names_xml_root = self.cf._std_names._root # canonical_units are K, should be False self.assertFalse(cfutil.is_dimensionless_standard_name(std_names_xml_root, 'sea_water_temperature')) # canonical_units are 1, should be True self.assertTrue(cfutil.is_dimensionless_standard_name(std_names_xml_root, 'sea_water_practical_salinity')) # canonical_units are 1e-3, should be True self.assertTrue(cfutil.is_dimensionless_standard_name(std_names_xml_root, 'sea_water_salinity')) def test_check_time_coordinate(self): dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_time_coordinate(dataset) for r in results: self.assertTrue(r.value) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_time_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert u'time does not have correct time units' in messages assert (scored, out_of) == (1, 2) def test_check_calendar(self): """Load a dataset with an invalid calendar attribute (non-comp/bad.nc). This dataset has a variable, "time" with calendar attribute "nope".""" dataset = self.load_dataset(STATIC_FILES['example-grid']) results = self.cf.check_calendar(dataset) for r in results: self.assertTrue(r.value) dataset = self.load_dataset(STATIC_FILES['bad']) results = self.cf.check_calendar(dataset) scored, out_of, messages = self.get_results(results) assert u"§4.4.1 Variable time should have a valid calendar: 'nope' is not a valid calendar" in messages def test_check_aux_coordinates(self): dataset = self.load_dataset(STATIC_FILES['illegal-aux-coords']) results = self.cf.check_aux_coordinates(dataset) result_dict = {result.name: result for result in results} result = result_dict[u"§5 Coordinate Systems"] assert result.msgs == [] # shouldn't have any messages assert result.value == (4, 4) def test_check_grid_coordinates(self): dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_grid_coordinates(dataset) scored, out_of, messages = self.get_results(results) result_dict = {result.name: result for result in results} result = result_dict[u'§5.6 Horizontal Coorindate Reference Systems, Grid Mappings, Projections'] assert result.value == (2, 2) assert (scored, out_of) == (2, 2) def test_check_two_dimensional(self): dataset = self.load_dataset(STATIC_FILES['2dim']) results = self.cf.check_grid_coordinates(dataset) for r in results: self.assertTrue(r.value) # Need the bad testing dataset = self.load_dataset(STATIC_FILES['bad2dim']) results = self.cf.check_grid_coordinates(dataset) scored, out_of, messages = self.get_results(results) # all variables checked fail (2) assert len(results) == 2 assert scored < out_of assert all(r.name == u'§5.6 Horizontal Coorindate Reference Systems, Grid Mappings, Projections' for r in results) def test_check_reduced_horizontal_grid(self): dataset = self.load_dataset(STATIC_FILES['rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) scored, out_of, messages = self.get_results(results) assert scored == out_of assert len(results) == 1 assert all(r.name == u'§5.3 Reduced Horizontal Grid' for r in results) # load failing ds -- one variable has failing check dataset = self.load_dataset(STATIC_FILES['bad-rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) scored, out_of, messages = self.get_results(results) assert scored != out_of assert len(results) == 2 assert len([r for r in results if r.value[0] < r.value[1]]) == 1 assert all(r.name == u'§5.3 Reduced Horizontal Grid' for r in results) def test_check_grid_mapping(self): dataset = self.load_dataset(STATIC_FILES['mapping']) results = self.cf.check_grid_mapping(dataset) # there are 8 results, 2 of which did not have perfect scores assert len(results) == 8 assert len([r.value for r in results if r.value[0] < r.value[1]]) == 2 assert all(r.name == u'§5.6 Horizontal Coorindate Reference Systems, Grid Mappings, Projections' for r in results) def test_check_geographic_region(self): dataset = self.load_dataset(STATIC_FILES['bad_region']) results = self.cf.check_geographic_region(dataset) scored, out_of, messages = self.get_results(results) # only one variable failed this check in this ds out of 2 assert len(results) == 2 assert scored < out_of assert u"6.1.1 'Neverland' specified by 'neverland' is not a valid region" in messages def test_check_packed_data(self): dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_packed_data(dataset) self.assertEqual(len(results), 4) self.assertFalse(results[0].value) self.assertFalse(results[1].value) self.assertTrue(results[2].value) self.assertFalse(results[3].value) def test_compress_packed(self): """Tests compressed indexed coordinates""" dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid']) results = self.cf.check_compression_gathering(dataset) self.assertTrue(results[0].value) dataset = self.load_dataset(STATIC_FILES['bad_data_type']) results = self.cf.check_compression_gathering(dataset) self.assertFalse(results[0].value) self.assertFalse(results[1].value) def test_check_all_features_are_same_type(self): dataset = self.load_dataset(STATIC_FILES['rutgers']) result = self.cf.check_all_features_are_same_type(dataset) assert result dataset = self.load_dataset(STATIC_FILES['featureType']) result = self.cf.check_all_features_are_same_type(dataset) assert result def test_featureType_is_case_insensitive(self): ''' Tests that the featureType attribute is case insensitive ''' nc = self.new_nc_file() nc.featureType = 'timeseriesprofile' result = self.cf.check_feature_type(nc) self.assertTrue(result.value == (1, 1)) nc.featureType = 'timeSeriesProfile' result = self.cf.check_feature_type(nc) self.assertTrue(result.value == (1, 1)) nc.featureType = 'traJectorYpRofile' result = self.cf.check_feature_type(nc) self.assertTrue(result.value == (1, 1)) # This one should fail nc.featureType = 'timeseriesprofilebad' result = self.cf.check_feature_type(nc) self.assertTrue(result.value == (0, 1)) def test_check_units(self): ''' Ensure that container variables are not checked for units but geophysical variables are ''' dataset = self.load_dataset(STATIC_FILES['units_check']) results = self.cf.check_units(dataset) # We don't keep track of the variables names for checks that passed, so # we can make a strict assertion about how many checks were performed # and if there were errors, which there shouldn't be. # FIXME (badams): find a better way of grouping together results by # variable checked instead of checking the number of # points scored, which should be deprecated, and # furthermore is fragile and breaks tests when check # definitions change scored, out_of, messages = self.get_results(results) assert scored == 24 assert out_of == 24 assert messages == [] def test_check_duplicates(self): ''' Test to verify that the check identifies duplicate axes. Load the duplicate_axis.nc dataset and verify the duplicate axes are accounted for. ''' dataset = self.load_dataset(STATIC_FILES['duplicate_axis']) results = self.cf.check_duplicate_axis(dataset) scored, out_of, messages = self.get_results(results) # only one check run here, so we can directly compare all the values assert scored != out_of assert messages[0] == u"'temp' has duplicate axis X defined by [lon_rho, lon_u]" def test_check_multi_dimensional_coords(self): ''' Test to verify that multi dimensional coordinates are checked for sharing names with dimensions ''' dataset = self.load_dataset(STATIC_FILES['multi-dim-coordinates']) results = self.cf.check_multi_dimensional_coords(dataset) scored, out_of, messages = self.get_results(results) # 4 variables were checked in this ds, 2 of which passed assert len(results) == 4 assert len([r for r in results if r.value[0] < r.value[1]]) == 2 assert all(r.name == u"§5 Coordinate Systems" for r in results) def test_64bit(self): dataset = self.load_dataset(STATIC_FILES['ints64']) suite = CheckSuite() suite.checkers = { 'cf' : CFBaseCheck } suite.run(dataset, 'cf') def test_variable_feature_check(self): # non-compliant dataset -- 1/1 fail dataset = self.load_dataset(STATIC_FILES['bad-trajectory']) results = self.cf.check_variable_features(dataset) scored, out_of, messages = self.get_results(results) assert len(results) == 1 assert scored < out_of assert len([r for r in results if r.value[0] < r.value[1]]) == 1 assert all(r.name == u'§9.1 Features and feature types' for r in results) # compliant dataset dataset = self.load_dataset(STATIC_FILES['trajectory-complete']) results = self.cf.check_variable_features(dataset) scored, out_of, messages = self.get_results(results) assert scored == out_of # compliant(?) dataset dataset = self.load_dataset(STATIC_FILES['trajectory-implied']) results = self.cf.check_variable_features(dataset) scored, out_of, messages = self.get_results(results) assert scored == out_of def test_check_cell_methods(self): """Load a dataset (climatology.nc) and check the cell methods. This dataset has variable "temperature" which has valid cell_methods format, cell_methods attribute, and valid names within the cell_methods attribute.""" dataset = self.load_dataset(STATIC_FILES['climatology']) results = self.cf.check_cell_methods(dataset) scored, out_of, messages = self.get_results(results) # use itertools.chain() to unpack the lists of messages results_list = list(chain(*(r.msgs for r in results if r.msgs))) # check the results only have expected headers assert set([r.name for r in results]).issubset(set([u'§7.1 Cell Boundaries', u'§7.3 Cell Methods'])) # check that all the expected variables have been hit assert all("temperature" in msg for msg in results_list) # check that all the results have come back passing assert all(r.value[0] == r.value[1] for r in results) # create a temporary variable and test this only nc_obj = MockTimeSeries() nc_obj.createVariable('temperature', 'd', ('time',)) temp = nc_obj.variables['temperature'] temp.cell_methods = 'lat: lon: mean depth: mean (interval: 20 meters)' results = self.cf.check_cell_methods(nc_obj) # invalid components lat, lon, and depth -- expect score == (6, 9) scored, out_of, messages = self.get_results(results) assert scored != out_of temp.cell_methods = 'lat: lon: mean depth: mean (interval: x whizbangs)' results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = self.get_results(results) # check non-standard comments are gauged correctly temp.cell_methods = 'lat: lon: mean depth: mean (comment: should not go here interval: 2.5 m)' results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = self.get_results(results) self.assertTrue(u'§7.3.3 The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature' in messages) # standalone comments require no keyword temp.cell_methods = 'lon: mean (This is a standalone comment)' results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = self.get_results(results) assert "standalone" not in messages # check that invalid keywords dealt with temp.cell_methods = 'lat: lon: mean depth: mean (invalid_keyword: this is invalid)' results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = self.get_results(results) self.assertTrue(u'§7.3.3 Invalid cell_methods keyword "invalid_keyword:" for variable temperature. Must be one of [interval, comment]' in messages) # check that "parenthetical elements" are well-formed (they should not be) temp.cell_methods = 'lat: lon: mean depth: mean (interval 0.2 m interval: 0.01 degrees)' results = self.cf.check_cell_methods(nc_obj) scored, out_of, messages = self.get_results(results) assert u'§7.3.3 Parenthetical content inside temperature:cell_methods is not well formed: interval 0.2 m interval: 0.01 degrees' in messages # -------------------------------------------------------------------------------- # Utility Method Tests # -------------------------------------------------------------------------------- def test_temporal_unit_conversion(self): self.assertTrue(units_convertible('hours', 'seconds')) self.assertFalse(units_convertible('hours', 'hours since 2000-01-01')) def test_units_temporal(self): self.assertTrue(units_temporal('hours since 2000-01-01')) self.assertFalse(units_temporal('hours')) self.assertFalse(units_temporal('days since the big bang'))
class TestCF(unittest.TestCase): # @see # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/ def shortDescription(self): return None # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests # ion.module:TestClassName.test_function_name def __repr__(self): name = self.id() name = name.split('.') if name[0] not in ["ion", "pyon"]: return "%s (%s)" % (name[-1], '.'.join(name[:-1])) else: return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" + '.'.join(name[-2:])) __str__ = __repr__ def setUp(self): ''' Initialize the dataset ''' self.cf = CFBaseCheck() #-------------------------------------------------------------------------------- # Helper Methods #-------------------------------------------------------------------------------- def new_nc_file(self): ''' Make a new temporary netCDF file for the scope of the test ''' nc_file_path = os.path.join(gettempdir(), 'example.nc') if os.path.exists(nc_file_path): raise IOError('File Exists: %s' % nc_file_path) nc = Dataset(nc_file_path, 'w') self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) return nc def get_pair(self, nc_dataset): ''' Return a pairwise object for the dataset ''' if isinstance(nc_dataset, basestring): nc_dataset = Dataset(nc_dataset, 'r') self.addCleanup(nc_dataset.close) dogma = NetCDFDogma('nc', self.cf.beliefs(), nc_dataset) pair = DSPair(nc_dataset, dogma) return pair def get_results(self, results): ''' Returns a tuple of the value scored, possible, and a list of messages in the result set. ''' out_of = 0 scored = 0 for r in results: if isinstance(r.value, tuple): out_of += r.value[1] scored += r.value[0] else: out_of += 1 scored += int(r.value) # Store the messages messages = [] for r in results: messages.extend(r.msgs) return scored, out_of, messages #-------------------------------------------------------------------------------- # Compliance Tests #-------------------------------------------------------------------------------- def test_check_data_types(self): """ 2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable """ dataset = self.get_pair(static_files['rutgers']) result = self.cf.check_data_types(dataset) self.assertTrue(result.value) dpair = self.get_pair(static_files['bad_data_type']) result = self.cf.check_data_types(dpair) assert result.value == (5, 6) def test_naming_conventions(self): ''' Section 2.3 Naming Conventions Variable, dimension and attribute names should begin with a letter and be composed of letters, digits, and underscores. ''' dataset = self.get_pair(static_files['rutgers']) result = self.cf.check_naming_conventions(dataset) num_var = len(dataset.dataset.variables) expected = (num_var, ) * 2 self.assertEquals(result.value, expected) dataset = self.get_pair(static_files['bad']) result = self.cf.check_naming_conventions(dataset) num_var = len(dataset.dataset.variables) expected = (num_var - 1, num_var) self.assertEquals(result.value, expected) assert '_poor_dim' in result.msgs[0] def test_check_names_unique(self): """ 2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same. """ dataset = self.get_pair(static_files['rutgers']) result = self.cf.check_names_unique(dataset) num_var = len(dataset.dataset.variables) expected = (num_var, ) * 2 self.assertEquals(result.value, expected) #TODO: Add bad unique names to bad.nc def test_check_dimension_names(self): """ 2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names. """ dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_dimension_names(dataset) assert result.value == (5, 6) def test_check_dimension_order(self): """ 2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z), "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y, then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the left of the spatiotemporal dimensions. """ dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_dimension_order(dataset) assert result.value == (11, 12) def test_check_fill_value_outside_valid_range(self): """ 2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable. """ dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_fill_value_outside_valid_range(dataset) assert sum((result.value for result in results)) == 1 assert len(results) == 2 def test_check_conventions_are_cf_16(self): """ 2.6.1 the NUG defined global attribute Conventions to the string value "CF-1.6" """ # :Conventions = "CF-1.6" dataset = self.get_pair(static_files['rutgers']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "CF-1.6 ,ACDD" ; dataset = self.get_pair(static_files['conv_multi']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertTrue(result.value) # :Conventions = "NoConvention" dataset = self.get_pair(static_files['conv_bad']) result = self.cf.check_conventions_are_cf_16(dataset) self.assertFalse(result.value) def test_check_convention_globals(self): """ 2.6.2 title/history global attributes, must be strings. Do not need to exist. """ #check for pass dataset = self.get_pair(static_files['rutgers']) result = self.cf.check_convention_globals(dataset) for each in result: self.assertTrue(each.value) #check if it doesn't exist that we pass dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_convention_globals(dataset) for each in result: self.assertTrue(each.value) def test_check_convention_possibly_var_attrs(self): """ 3.1 The units attribute is required for all variables that represent dimensional quantities (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables defined in Section 7.4, "Climatological Statistics"). Units are not required for dimensionless quantities. A variable with no units attribute is assumed to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be included. - units required - type must be recognized by udunits - if std name specified, must be consistent with standard name table, must also be consistent with a specified cell_methods attribute if present """ dataset = self.get_pair(static_files['rutgers']) result = self.cf.check_convention_possibly_var_attrs(dataset) for each in result: self.assertTrue(each.value) dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_convention_possibly_var_attrs(dataset) for each in result: self.assertFalse(each.value) def test_check_standard_name(self): """ 3.3 A standard name is associated with a variable via the attribute standard_name which takes a string value comprised of a standard name optionally followed by one or more blanks and a standard name modifier """ dataset = self.get_pair(static_files['2dim']) result = self.cf.check_standard_name(dataset) for each in result: self.assertTrue(each.value) dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_standard_name(dataset) for each in result: self.assertFalse(each.value) def test_check_flags(self): dataset = self.get_pair(static_files['self_referencing']) results = self.cf.check_flags(dataset) scored, out_of, messages = self.get_results(results) self.assertEqual(scored, 46) self.assertEqual(out_of, 59) self.assertEqual(messages.count(u'flag_values must be a list'), 6) self.assertEqual( messages.count( u'flag_values attr does not have same type as var (fv: int8, v: int16)' ), 6) self.assertEqual( messages.count( u'flag_values attr does not have same type as var (fv: <U1, v: int16)' ), 1) def test_check_units(self): dataset = self.get_pair(static_files['2dim']) result = self.cf.check_units(dataset) for each in result: self.assertTrue(each.value) dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_units(dataset) for each in result: self.assertFalse(each.value) def test_coordinate_types(self): ''' Section 4 Coordinate Types We strongly recommend that coordinate variables be used for all coordinate types whenever they are applicable. ''' dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_coordinate_vars_for_all_coordinate_types( dataset) for each in result: self.assertTrue(each.value) def test_check_coordinate_axis_attr(self): dataset = self.get_pair(static_files['2dim']) result = self.cf.check_coordinate_axis_attr(dataset) for each in result: self.assertTrue(each.value) dataset = self.get_pair(static_files['bad_data_type']) result = self.cf.check_coordinate_axis_attr(dataset) for each in result: if each.name[1] in ['time', 'latitude']: self.assertTrue(each.value) if each.name[1] in ['salinity']: if each.name[2] not in ['does_not_depend_on_mult_coord_vars']: self.assertFalse(each.value) def test_latitude(self): ''' Section 4.1 Latitude Coordinate ''' # Check compliance dataset = self.get_pair(static_files['example-grid']) results = self.cf.check_latitude(dataset) for r in results: if isinstance(r.value, tuple): self.assertEquals(r.value[0], r.value[1]) else: self.assertTrue(r.value) # Verify non-compliance dataset = self.get_pair(static_files['bad']) results = self.cf.check_latitude(dataset) scored, out_of, messages = self.get_results(results) assert 'lat does not have units attribute' in messages assert 'lat_uv units are acceptable, but not recommended' in messages assert 'lat_like does not have units attribute' in messages assert scored == 5 assert out_of == 12 def test_longitude(self): ''' Section 4.2 Longitude Coordinate ''' # Check compliance dataset = self.get_pair(static_files['example-grid']) results = self.cf.check_longitude(dataset) for r in results: if isinstance(r.value, tuple): self.assertEquals(r.value[0], r.value[1]) else: self.assertTrue(r.value) # Verify non-compliance dataset = self.get_pair(static_files['bad']) results = self.cf.check_longitude(dataset) scored, out_of, messages = self.get_results(results) assert 'lon does not have units attribute' in messages assert 'lon_uv units are acceptable, but not recommended' in messages assert 'lon_like does not have units attribute' in messages assert scored == 5 assert out_of == 12 def test_is_vertical_coordinate(self): ''' Section 4.3 Qualifiers for Vertical Coordinate NOTE: The standard doesn't explicitly say that vertical coordinates must be a coordinate type. ''' # Make something that I can attach attrs to mock_variable = MockVariable # Proper name/standard_name known_name = mock_variable() known_name.standard_name = 'depth' self.assertTrue(is_vertical_coordinate('not_known', known_name)) # Proper Axis axis_set = mock_variable() axis_set.axis = 'Z' self.assertTrue(is_vertical_coordinate('not_known', axis_set)) # Proper units units_set = mock_variable() units_set.units = 'dbar' self.assertTrue(is_vertical_coordinate('not_known', units_set)) # Proper units/positive positive = mock_variable() positive.units = 'm' positive.positive = 'up' self.assertTrue(is_vertical_coordinate('not_known', positive)) def test_vertical_coordinate(self): ''' Section 4.3 Vertical (Height or Depth) coordinate ''' # Check compliance dataset = self.get_pair(static_files['example-grid']) results = self.cf.check_vertical_coordinate(dataset) for r in results: self.assertTrue(r.value) # Check non-compliance dataset = self.get_pair(static_files['bad']) results = self.cf.check_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert 'height does not have units' in messages assert 'vertical variable depth needs to define positive attribute' assert 'vertical variable depth2 needs to define positive attribute' def test_vertical_dimension(self): ''' Section 4.3.1 Dimensional Vertical Coordinate ''' # Check for compliance dataset = self.get_pair(static_files['example-grid']) results = self.cf.check_dimensional_vertical_coordinate(dataset) for r in results: self.assertTrue(r.value) # Check for non-compliance dataset = self.get_pair(static_files['bad']) results = self.cf.check_dimensional_vertical_coordinate(dataset) for r in results: self.assertFalse(r.value) def test_appendix_d(self): ''' CF 1.6 Appendix D The definitions given here allow an application to compute dimensional coordinate values from the dimensionless ones and associated variables. The formulas are expressed for a gridpoint (n,k,j,i) where i and j are the horizontal indices, k is the vertical index and n is the time index. A coordinate variable is associated with its definition by the value of the standard_name attribute. The terms in the definition are associated with file variables by the formula_terms attribute. The formula_terms attribute takes a string value, the string being comprised of blank-separated elements of the form "term: variable", where term is a keyword that represents one of the terms in the definition, and variable is the name of the variable in a netCDF file that contains the values for that term. The order of elements is not significant. ''' dimless = dict(dimless_vertical_coordinates) def verify(std_name, test_str): regex_matches = re.match(dimless[std_name], test_str) self.assertIsNotNone(regex_matches) # For each of the listed dimensionless vertical coordinates, # verify that the formula_terms match the provided regex verify('atmosphere_ln_pressure_coordinate', "p0: var1 lev: var2") verify('atmosphere_sigma_coordinate', "sigma: var1 ps: var2 ptop: var3") verify('atmosphere_hybrid_sigma_pressure_coordinate', "a: var1 b: var2 ps: var3 p0: var4") verify('atmosphere_hybrid_height_coordinate', "a: var1 b: var2 orog: var3") verify( 'atmosphere_sleve_coordinate', "a: var1 b1: var2 b2: var3 ztop: var4 zsurf1: var5 zsurf2: var6") verify('ocean_sigma_coordinate', "sigma: var1 eta: var2 depth: var3") verify('ocean_s_coordinate', "s: var1 eta: var2 depth: var3 a: var4 b: var5 depth_c: var6") verify( 'ocean_sigma_z_coordinate', "sigma: var1 eta: var2 depth: var3 depth_c: var4 nsigma: var5 zlev: var6" ) verify( 'ocean_double_sigma_coordinate', "sigma: var1 depth: var2 z1: var3 z2: var4 a: var5 href: var6 k_c: var7" ) def test_dimensionless_vertical(self): ''' Section 4.3.2 ''' # Check affirmative compliance dataset = self.get_pair(static_files['dimensionless']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) for r in results: self.assertTrue(r.value) # Check negative compliance dataset = self.get_pair(static_files['bad']) results = self.cf.check_dimensionless_vertical_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert u'formula_terms missing from dimensionless coordinate lev1' in messages assert u'formula_terms not defined for dimensionless coordinate lev1' in messages assert u'var1 missing for dimensionless coordinate lev2' in messages assert u'var2 missing for dimensionless coordinate lev2' in messages assert u'var3 missing for dimensionless coordinate lev2' in messages assert scored == 1 assert out_of == 4 def test_is_time_variable(self): var1 = MockVariable() var1.standard_name = 'time' self.assertTrue(is_time_variable('not_time', var1)) var2 = MockVariable() self.assertTrue(is_time_variable('time', var2)) self.assertFalse(is_time_variable('not_time', var2)) var3 = MockVariable() var3.axis = 'T' self.assertTrue(is_time_variable('maybe_time', var3)) var4 = MockVariable() var4.units = 'seconds since 1900-01-01' self.assertTrue(is_time_variable('maybe_time', var4)) def test_check_time_coordinate(self): dataset = self.get_pair(static_files['example-grid']) results = self.cf.check_time_coordinate(dataset) for r in results: self.assertTrue(r.value) dataset = self.get_pair(static_files['bad']) results = self.cf.check_time_coordinate(dataset) scored, out_of, messages = self.get_results(results) assert u'bad_time_1 does not have units' in messages assert u'bad_time_2 doesn not have correct time units' in messages assert scored == 1 assert out_of == 3 def test_check_calendar(self): dataset = self.get_pair(static_files['example-grid']) results = self.cf.check_calendar(dataset) for r in results: self.assertTrue(r.value) dataset = self.get_pair(static_files['bad']) results = self.cf.check_calendar(dataset) scored, out_of, messages = self.get_results(results) assert u'Variable bad_time_1 should have a calendar attribute' in messages assert u"Variable bad_time_2 should have a valid calendar: 'nope' is not a valid calendar" in messages def test_self_referencing(self): ''' This test captures a check where a coordinate has circular references ''' dataset = self.get_pair(static_files['self_referencing']) results = self.cf.check_two_dimensional(dataset) scored, out_of, messages = self.get_results(results) assert u"Variable TEMP_H's coordinate references itself" in messages assert scored == 0 assert out_of == 44 def test_check_independent_axis_dimensions(self): dataset = self.get_pair(static_files['example-grid']) results = self.cf.check_independent_axis_dimensions(dataset) for r in results: self.assertTrue(r.value) dataset = self.get_pair(static_files['bad']) results = self.cf.check_independent_axis_dimensions(dataset) scored, out_of, messages = self.get_results(results) assert u'The lev dimension for the variable lev1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert u'The lev dimension for the variable lev2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert u'The time dimension for the variable bad_time_1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert u'The time dimension for the variable bad_time_2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert u'The time dimension for the variable column_temp does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \ in messages assert scored == 6 assert out_of == 11 def test_check_two_dimensional(self): dataset = self.get_pair(static_files['2dim']) results = self.cf.check_two_dimensional(dataset) for r in results: self.assertTrue(r.value) # Need the bad testing dataset = self.get_pair(static_files['bad2dim']) results = self.cf.check_two_dimensional(dataset) self.assertTrue(results[0].value) self.assertFalse(results[1].value) self.assertFalse(results[2].value) self.assertTrue(results[3].value) self.assertFalse(results[4].value) self.assertTrue(results[5].value) # Test the self referencing variables dataset = self.get_pair(static_files['self-referencing-var']) try: results = self.cf.check_two_dimensional(dataset) self.assertFalse(results[0].value) except: self.assertTrue(False) def test_check_reduced_horizontal_grid(self): dataset = self.get_pair(static_files['rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) rd = {r.name[1]: r.value for r in results} self.assertTrue(rd['PS']) dataset = self.get_pair(static_files['bad-rhgrid']) results = self.cf.check_reduced_horizontal_grid(dataset) rd = {r.name[1]: (r.value, r.msgs) for r in results} for name, (value, msg) in rd.iteritems(): self.assertFalse(value) self.assertIn('Coordinate longitude is not a proper variable', rd['PSa'][1]) self.assertIn( "Coordinate latitude's dimension, latdim, is not a dimension of PSb", rd['PSb'][1]) assert 'PSc' not in rd.keys() def test_check_horz_crs_grid_mappings_projections(self): dataset = self.get_pair(static_files['mapping']) results = self.cf.check_horz_crs_grid_mappings_projections(dataset) rd = {r.name[1]: r.value for r in results} assert rd['wgs84'] == (3, 3) assert rd['epsg'] == (7, 8) def test_check_scalar_coordinate_system(self): dataset = self.get_pair(static_files['scalar_coordinate_variable']) results = self.cf.check_scalar_coordinate_system(dataset) self.assertEqual(len(results), 2) for r in results: if r.name[1] == 'HEIGHT': self.assertEqual(r.value, (0, 1)) elif r.name[1] == 'DEPTH': self.assertEqual(r.value, (2, 2)) else: self.assertTrue( False, 'Unexpected variable in results of check_scalar_coordinate_system' ) def test_check_geographic_region(self): dataset = self.get_pair(static_files['bad_region']) results = self.cf.check_geographic_region(dataset) self.assertFalse(results[0].value) self.assertTrue(results[1].value) def test_check_alternative_coordinates(self): dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_alternative_coordinates(dataset) self.assertTrue(results[0].value) #def test_check_cell_boundaries(self): # dataset = self.get_pair(static_files['bad_data_type']) # results = self.cf.check_cell_boundaries(dataset) # print results # self.assertTrue(results[0].value) def test_check_packed_data(self): dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_packed_data(dataset) self.assertEqual(len(results), 4) self.assertFalse(results[0].value) self.assertTrue(results[1].value) self.assertTrue(results[2].value) self.assertFalse(results[3].value) def test_check_compression(self): dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_compression(dataset) assert results[0].value == (2, 2) assert results[1].value == (0, 2) def test_check_all_features_are_same_type(self): dataset = self.get_pair(static_files['rutgers']) results = self.cf.check_all_features_are_same_type(dataset) assert results == None dataset = self.get_pair(static_files['featureType']) results = self.cf.check_all_features_are_same_type(dataset) self.assertTrue(results.value) dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_all_features_are_same_type(dataset) self.assertFalse(results.value) def test_check_orthogonal_multidim_array(self): dataset = self.get_pair(static_files['rutgers']) results = self.cf.check_orthogonal_multidim_array(dataset) for each in results: self.assertTrue(each.value) def test_check_incomplete_multidim_array(self): dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_incomplete_multidim_array(dataset) for each in results: self.assertTrue(each.value) def test_check_contiguous_ragged_array(self): dataset = self.get_pair(static_files['cont_ragged']) results = self.cf.check_contiguous_ragged_array(dataset) for each in results: self.assertTrue(each.value) def test_check_indexed_ragged_array(self): dataset = self.get_pair(static_files['index_ragged']) results = self.cf.check_indexed_ragged_array(dataset) for each in results: self.assertTrue(each.value) def test_check_feature_type(self): dataset = self.get_pair(static_files['index_ragged']) results = self.cf.check_feature_type(dataset) self.assertTrue(results.value) dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_feature_type(dataset) self.assertFalse(results.value) def test_check_coordinates_and_metadata(self): dataset = self.get_pair(static_files['bad_data_type']) results = self.cf.check_coordinates_and_metadata(dataset) self.assertFalse(results[0].value) self.assertTrue(results[1].value) self.assertFalse(results[2].value) dataset = self.get_pair(static_files['index_ragged']) results = self.cf.check_coordinates_and_metadata(dataset) self.assertTrue(results[-1].value) dataset = self.get_pair(static_files['coordinates_and_metadata']) results = self.cf.check_coordinates_and_metadata(dataset) self.assertTrue(len(results) == 2) self.assertFalse(results[0].value) self.assertFalse(results[1].value) def test_check_missing_data(self): dataset = self.get_pair(static_files['index_ragged']) results = self.cf.check_missing_data(dataset) for each in results: self.assertTrue(each.value) dataset = self.get_pair(static_files['bad_missing_data']) results = self.cf.check_missing_data(dataset) for each in results: self.assertFalse(each.value) def test_check_units(self): ''' Ensure that container variables are not checked for units but geophysical variables are ''' dataset = self.get_pair(static_files['units_check']) results = self.cf.check_units(dataset) # We don't keep track of the variables names for checks that passed, so # we can make a strict assertion about how many checks were performed # and if there were errors, which there shouldn't be. scored, out_of, messages = self.get_results(results) assert scored == 4 assert out_of == 4 assert messages == [] def test_64bit(self): dataset = self.get_pair(static_files['ints64']) suite = CheckSuite() suite.checkers = {'cf': CFBaseCheck} suite.run(dataset, 'cf') def test_time_units(self): dataset = self.get_pair(static_files['time_units']) results = self.cf.check_units(dataset) scored, out_of, messages = self.get_results(results) assert u'units are days since 1970-01-01, standard_name units should be K' in messages assert scored == 1 assert out_of == 2 #-------------------------------------------------------------------------------- # Utility Method Tests #-------------------------------------------------------------------------------- def test_temporal_unit_conversion(self): self.assertTrue(units_convertible('hours', 'seconds')) self.assertFalse(units_convertible('hours', 'hours since 2000-01-01')) def test_units_temporal(self): self.assertTrue(units_temporal('hours since 2000-01-01')) self.assertFalse(units_temporal('hours')) self.assertFalse(units_temporal('days since the big bang'))