def EG_extract_region(): # subset on dimensions using Coordinate concept. # NOTE: could be done in Iris, but then also requires CF compliance ?? input_path = os.path.join(basedir, 'test.nc') output_path = os.path.join(basedir, 'test_out.nc') depth_start, depth_end = (50.0, 550.0) input = nc_files.read(input_path) depth_dim = input.dimensions['depth'] depth_coord = input.variables['depth'] # Work out indexing to the part we want i_start = np.where(depth_coord[:] >= depth_start)[0][0] i_end_indices = np.where(depth_coord[:] >= depth_end)[0] if i_end_indices: i_end = i_end_indices[0] n_depths = i_end - i_start + 1 else: i_end = -1 n_depths = depth_dim.length - i_start depth_slice = slice(i_start, i_end) # Adjust the dimension definition. depth_dim.length = n_depths # Adjust all the referencing variables (N.B. includes depth coord). for var in ncg.all_variables(input): if depth_dim in var.dimensions: dim_index = var.dimensions.index(depth_dim) slices = [depth_slice if index == dim_index else slice(None) for index in range(len(var.dimensions))] var.data = var[slices] # Write result nc_files.write(output, output_path)
def test_cdl__ncdump_match(self): ncdump_output = _complex_cdl[:] g = _make_complex_group() file_dirpath = tempfile.mkdtemp() try: file_path = os.path.join(file_dirpath, 'temp.nc') with nc4.Dataset(file_path, 'w') as ds: ncf.write(ds, g) results = subprocess.check_output( 'ncdump -h ' + file_path, shell=True) finally: shutil.rmtree(file_dirpath) expected = strip_lines(ncdump_output) found = strip_lines(results) self.assertEqual(found, expected)
def EG_some_variables(): # Copy only certain variables to output file. input_path = os.path.join(basedir, 'test.nc') output_path = os.path.join(basedir, 'test_out.nc') # Read input. nco = nc_files.read(input_path) # Delete all-but selected variables. varnames = ('temp', 'depth') for var in ncg.all_variables(input): if var.name not in var_names: output.variables.add(var) # Write out. nc_files.write(nco, output_path)
def EG_filter_variables(): # Filter variables, removing some, and flatten structure. input_path = os.path.join(basedir, 'test.nc') output_path = os.path.join(basedir, 'test_out.nc') # Read input. input = nc_files.read(input_path) # Make blank output. output = Group() varname_parts_only = ('temp', 'depth') varname_ends_exclude = ('_QC', '_stats') # Copy selected variables. for var in ncg.all_variables(input): if (any(var.name.find(part) >= 0 for part in varname_parts_only) and not any(var.name.endswith(part) for part in varname_ends_exclude)): output.variables.add(var) # Write out. nc_files.write(output, output_path)
def EG_flatten(): # Copy certain information to output file, losing existing structure. input_path = os.path.join(basedir, 'test.nc') output_path = os.path.join(basedir, 'test_out.nc') # Read input. input = nc_files.read(input_path) # Make blank output. output = Group() # Partial copy. output.groups.add(input.groups['grid_dims']) output.variables.add_all(ncg.all_variables(input.groups['group_a'])) output.variables.add_all(ncg.all_variables(input.groups['group_Q'])) # Write out. # N.B. relevant top-level dimensions etc. will be re-created. # but this 'flattens' everything into the root group. nc_files.write(nco, output_path)
def test_simple_to_path(self): test_outfile_name = 'test_simple.nc' test_outfile_path = os.path.join(testdata_dirpath, test_outfile_name) array = np.arange(4) var = ov('v1', dd=[od('x')], aa=[oa('v_att', 'this')], data=array) g = og('', vv=[var]) self.assertFalse(ncg.has_no_missing_dims(g)) try: write(test_outfile_path, g) self.assertTrue(ncg.has_no_missing_dims(g)) # Read it back again and check. with netCDF4.Dataset(test_outfile_path) as ds: g_back = read(ds) self.assertEqual(g_back, g) finally: if not leave_output: # Remove temporary file if os.path.exists(test_outfile_path): os.remove(test_outfile_path)
def test_add_to_dataset(self): test_outfile_name = 'test_add.nc' test_outfile_path = os.path.join(testdata_dirpath, test_outfile_name) try: ds = netCDF4.Dataset(test_outfile_path, 'w') ds.setncattr('extra', 4.5) g = og('', vv=[ov('v1', aa=[oa('var_attr', 'this_value')], data=np.array(3.2))]) write(ds, g) ds.close() # Read it back again and check. with netCDF4.Dataset(test_outfile_path) as ds: g = read(ds) self.assertEqual(g.attributes['extra'].value, 4.5) self.assertEqual(list(g.dimensions), []) self.assertEqual( g.variables['v1'].attributes['var_attr'].value, 'this_value') finally: if not leave_output: if os.path.exists(test_outfile_path): os.remove(test_outfile_path)
def check_file_cdl(file_path): # Load from the given file with netCDF4.Dataset(file_path) as ds: g = ncds.read(ds) # Re-save to a temporary file, to get an ncdump output in known order. ncds.write('temp.nc', g) # Rename so name matches temporary file name as seen by ncdump. g.rename('temp') # Generate cdl. g_cdl = cdl(g) try: # Run ncdump on the test file f_cdl = subprocess.check_output('ncdump -h temp.nc', shell=True) finally: os.remove('temp.nc') # Check that the two CDL strings are "essentially" the same. g_cdl_std = ncdl.comparable_cdl(g_cdl) f_cdl_std = ncdl.comparable_cdl(f_cdl) if g_cdl_std == f_cdl_std: print 'OK (ncdump and ncobj output EQUAL)' else: print 'FAIL: ncdump and ncobj output differ..' print
def EG_extract_combine(): # Combine selected data with a month's worth each from several files. input_paths = glob.glob(os.path.join(basedir, 'test_sequence_*.nc')) output_path = os.path.join(basedir, 'test_out.nc') desired_var_names = ('air_temperature', 'surface_pressure') # Create a suitable empty output structure. output = Group() output.dimensions.add(Dimension('time', length=0)) output_time_dim = output.dimensions['time'] output.variables.add(Variable('time', dimensions=output_time_dim)) output_time_var = output.variables['time'] # N.B. time 'units' attribute and data array added later, copied from # the first input file. # sort input filenames for correct data times ordering input_paths = sorted(input_paths) # get whole months from input files in name order and concat to output. first_file = True for input_path in input_paths: input = nc_files.read(input_path) # Get input file time information. time_var = input.variables['time'] time_units = time_var.attributes['units'].value time_dts = iris.Unit(time_units).num2date(time_var[:]) monthdays = np.array([dt.day for dt in time_dts]) if first_file: first_file = False # First time through, establish time base at first month start. time_next = time_dts[np.where(monthdays == 1)[0][0]] # First time though, initialise time variable units + data (empty). output_time_var.attributes.add( Attribute(name='units', value=time_units)) output_time_var.data = np.array([]) # First time through, create output variables matching input. for varname in desired_var_names: output.variables[varname] = input.variables[varname] var = output.variables[varname] assert var.dimensions[0].name == 'time' # Change first dimension to output_time (current length=0). var.dimensions[0] = output_time_var dim_lens = [dim.length for dim in var_dims] assert dim_lens[0] == 0 # Assign a data array of correct dimensions (initially empty). var.data = np.array(dim_lens) # Get index for the month start. month_start = np.where(time_dts == time_next)[0][0] # Calculate next start date. is_december = time_next.month == 12 time_next = datetime.datetime( year=year+1 if is_december else year, month=1 if is_december else month+1, day=1) # Get index for the month end, or -1 if not in file. next_months = np.where(time_dts == time_next)[0] month_end = next_months[0] if next_months else -1 # Slice a month from the desired variables and copy to the output. for varname in desired_var_names: var = input.variables[varname] assert var.dimensions[0].name == 'time' # Note: this array concatenation is crude + expensive in memory. # This is where we really need deferred data management. output.variables[varname].data = np.concatenate( (output.variables[varname][:], var[month_start:month_end]), axis=0) # # NOTE: a cool one-line version, that may not be possible ... # output.variables[varname][N:] = var[month_start:month_end] # (N.B. you can do this with lists, but *not* numpy arrays) # # alternatively, we could have a method such as 'extend'? # Extend the time coord variable appropriately. # NOTE: with missing data, the time values sequence will show gaps. output_time_var.data = np.concatenate( (output_time_var[:], time_var[month_start:month_end]), axis=0) # Write the output file. nc_files.write(output, output_path)
oa('root_attr_vec', np.array([1.2, 3, 4]))], dd=[od('root_dim_x', 2)], vv=[ov('root_var_1', dd=[od('root_dim_x')], aa=[oa('root_var_attr_1', 11)], data=np.zeros((2))), ov('root_var_2_scalar', data=np.array(3.15, dtype=np.float32))], gg=[og('subgroup', aa=[oa('subgroup_attr', 'qq')], dd=[od('subgroup_dim_y', 3)], vv=[ov('subgroup_var', dd=[od('root_dim_x'), od('subgroup_dim_y')], aa=[oa('subgroup_var_attr', 57.31)], data=np.zeros((2, 3)))], gg=[og('sub_sub_group', aa=[oa('sub_sub_group_attr', 'this')], vv=[ov('sub_sub_group_var', dd=[od('subgroup_dim_y')], data=np.zeros((3)))])]), og('sg_2_empty')]) ncg.complete(g) print g print file_path = './temp.nc' with nc4.Dataset(file_path, 'w') as ds: ncf.write(ds, g) print os.system('ncdump -h temp.nc')