def test_rename(self): data = create_test_data() newnames = {'var1': 'renamed_var1', 'dim2': 'renamed_dim2'} renamed = data.rename(newnames) variables = OrderedDict(data.variables) for k, v in iteritems(newnames): variables[v] = variables.pop(k) for k, v in iteritems(variables): dims = list(v.dimensions) for name, newname in iteritems(newnames): if name in dims: dims[dims.index(name)] = newname self.assertVariableEqual(Variable(dims, v.values, v.attrs), renamed.variables[k]) self.assertEqual(v.encoding, renamed.variables[k].encoding) self.assertEqual(type(v), type(renamed.variables[k])) self.assertTrue('var1' not in renamed.variables) self.assertTrue('dim2' not in renamed.variables) with self.assertRaisesRegexp(ValueError, "cannot rename 'not_a_var'"): data.rename({'not_a_var': 'nada'}) # verify that we can rename a variable without accessing the data var1 = data['var1'] data['var1'] = (var1.dimensions, InaccessibleArray(var1.values)) renamed = data.rename(newnames) with self.assertRaises(UnexpectedDataAccess): renamed['renamed_var1'].values
def set_variable(self, name, variable): variable = encode_cf_variable(variable) if self.format == 'NETCDF4': values, datatype = _nc4_values_and_dtype(variable) else: variable = encode_nc3_variable(variable) values = variable.values datatype = variable.dtype self.set_necessary_dimensions(variable) fill_value = variable.attrs.pop('_FillValue', None) encoding = variable.encoding nc4_var = self.ds.createVariable( varname=name, datatype=datatype, dimensions=variable.dimensions, zlib=encoding.get('zlib', False), complevel=encoding.get('complevel', 4), shuffle=encoding.get('shuffle', True), fletcher32=encoding.get('fletcher32', False), contiguous=encoding.get('contiguous', False), chunksizes=encoding.get('chunksizes'), endian=encoding.get('endian', 'native'), least_significant_digit=encoding.get('least_significant_digit'), fill_value=fill_value) nc4_var.set_auto_maskandscale(False) nc4_var[:] = values for k, v in iteritems(variable.attrs): # set attributes one-by-one since netCDF4<1.0.10 can't handle # OrderedDict as the input to setncatts nc4_var.setncattr(k, v)
def test_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip with create_tmp_file() as tmp_file: ds = nc4.Dataset(tmp_file, 'w') ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time',)) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 ds.close() expected = Dataset() time = pd.date_range('1999-01-05', periods=10) encoding = {'units': units, 'dtype': np.dtype('int32')} expected['time'] = ('time', time, {}, encoding) actual = open_dataset(tmp_file) self.assertVariableEqual(actual['time'], expected['time']) actual_encoding = {k: v for k, v in iteritems(actual['time'].encoding) if k in expected['time'].encoding} self.assertDictEqual(actual_encoding, expected['time'].encoding)
def test_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip with create_tmp_file() as tmp_file: ds = nc4.Dataset(tmp_file, 'w') ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time', )) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 ds.close() expected = Dataset() time = pd.date_range('1999-01-05', periods=10) encoding = {'units': units, 'dtype': np.dtype('int32')} expected['time'] = ('time', time, {}, encoding) actual = open_dataset(tmp_file) self.assertVariableEqual(actual['time'], expected['time']) actual_encoding = { k: v for k, v in iteritems(actual['time'].encoding) if k in expected['time'].encoding } self.assertDictEqual(actual_encoding, expected['time'].encoding)
def test_compression_encoding(self): data = create_test_data() data['var2'].encoding.update({'zlib': True, 'chunksizes': (10, 10), 'least_significant_digit': 2}) actual = self.roundtrip(data) for k, v in iteritems(data['var2'].encoding): self.assertEqual(v, actual['var2'].encoding[k])
def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset( { k: v.transpose(*data[k].dimensions) for k, v in iteritems(dataset.variables) }, dataset.attrs)
def test_compression_encoding(self): data = create_test_data() data['var2'].encoding.update({ 'zlib': True, 'chunksizes': (10, 10), 'least_significant_digit': 2 }) actual = self.roundtrip(data) for k, v in iteritems(data['var2'].encoding): self.assertEqual(v, actual['var2'].encoding[k])
def test_squeeze(self): data = Dataset({'foo': (['x', 'y', 'z'], [[[1], [2]]])}) for args in [[], [['x']], [['x', 'z']]]: def get_args(v): return [set(args[0]) & set(v.dimensions)] if args else [] expected = Dataset({k: v.squeeze(*get_args(v)) for k, v in iteritems(data.variables)}) self.assertDatasetIdentical(expected, data.squeeze(*args)) # invalid squeeze with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'): data.squeeze('y')
def set_variable(self, name, variable): variable = encode_nc3_variable( conventions.encode_cf_variable(variable)) self.set_necessary_dimensions(variable) data = variable.values self.ds.createVariable(name, data.dtype, variable.dimensions) scipy_var = self.ds.variables[name] if data.ndim == 0: scipy_var.assignValue(data) else: scipy_var[:] = data[:] for k, v in iteritems(variable.attrs): self._validate_attr_key(k) setattr(scipy_var, k, self._cast_attr_value(v))
def test_squeeze(self): data = Dataset({'foo': (['x', 'y', 'z'], [[[1], [2]]])}) for args in [[], [['x']], [['x', 'z']]]: def get_args(v): return [set(args[0]) & set(v.dimensions)] if args else [] expected = Dataset({ k: v.squeeze(*get_args(v)) for k, v in iteritems(data.variables) }) self.assertDatasetIdentical(expected, data.squeeze(*args)) # invalid squeeze with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'): data.squeeze('y')
def test_indexed(self): data = create_test_data() slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 2)} ret = data.indexed(**slicers) # Verify that only the specified dimension was altered self.assertItemsEqual(data.dimensions, ret.dimensions) for d in data.dimensions: if d in slicers: self.assertEqual( ret.dimensions[d], np.arange(data.dimensions[d])[slicers[d]].size) else: self.assertEqual(data.dimensions[d], ret.dimensions[d]) # Verify that the data is what we expect for v in data.variables: self.assertEqual(data[v].dimensions, ret[v].dimensions) self.assertEqual(data[v].attrs, ret[v].attrs) slice_list = [slice(None)] * data[v].values.ndim for d, s in iteritems(slicers): if d in data[v].dimensions: inds = np.nonzero(np.array(data[v].dimensions) == d)[0] for ind in inds: slice_list[ind] = s expected = data[v].values[slice_list] actual = ret[v].values np.testing.assert_array_equal(expected, actual) with self.assertRaises(ValueError): data.indexed(not_a_dim=slice(0, 2)) ret = data.indexed(dim1=0) self.assertEqual({'time': 20, 'dim2': 50, 'dim3': 10}, ret.dimensions) self.assertItemsEqual( list(data.noncoordinates) + ['dim1'], ret.noncoordinates) ret = data.indexed(time=slice(2), dim1=0, dim2=slice(5)) self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dimensions) self.assertItemsEqual( list(data.noncoordinates) + ['dim1'], ret.noncoordinates) ret = data.indexed(time=0, dim1=0, dim2=slice(5)) self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dimensions) self.assertItemsEqual( list(data.noncoordinates) + ['dim1', 'time'], ret.noncoordinates)
def test_indexed(self): data = create_test_data() slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 2)} ret = data.indexed(**slicers) # Verify that only the specified dimension was altered self.assertItemsEqual(data.dimensions, ret.dimensions) for d in data.dimensions: if d in slicers: self.assertEqual(ret.dimensions[d], np.arange(data.dimensions[d])[slicers[d]].size) else: self.assertEqual(data.dimensions[d], ret.dimensions[d]) # Verify that the data is what we expect for v in data.variables: self.assertEqual(data[v].dimensions, ret[v].dimensions) self.assertEqual(data[v].attrs, ret[v].attrs) slice_list = [slice(None)] * data[v].values.ndim for d, s in iteritems(slicers): if d in data[v].dimensions: inds = np.nonzero(np.array(data[v].dimensions) == d)[0] for ind in inds: slice_list[ind] = s expected = data[v].values[slice_list] actual = ret[v].values np.testing.assert_array_equal(expected, actual) with self.assertRaises(ValueError): data.indexed(not_a_dim=slice(0, 2)) ret = data.indexed(dim1=0) self.assertEqual({'time': 20, 'dim2': 50, 'dim3': 10}, ret.dimensions) self.assertItemsEqual(list(data.noncoordinates) + ['dim1'], ret.noncoordinates) ret = data.indexed(time=slice(2), dim1=0, dim2=slice(5)) self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dimensions) self.assertItemsEqual(list(data.noncoordinates) + ['dim1'], ret.noncoordinates) ret = data.indexed(time=0, dim1=0, dim2=slice(5)) self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dimensions) self.assertItemsEqual(list(data.noncoordinates) + ['dim1', 'time'], ret.noncoordinates)
def test_properties(self): self.assertDatasetIdentical(self.dv.dataset, self.ds) self.assertEqual(self.dv.name, 'foo') self.assertVariableEqual(self.dv.variable, self.v) self.assertArrayEqual(self.dv.values, self.v.values) for attr in ['dimensions', 'dtype', 'shape', 'size', 'ndim', 'attrs']: self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr)) self.assertEqual(len(self.dv), len(self.v)) self.assertVariableEqual(self.dv, self.v) self.assertEqual(list(self.dv.coordinates), list(self.ds.coordinates)) for k, v in iteritems(self.dv.coordinates): self.assertArrayEqual(v, self.ds.coordinates[k]) with self.assertRaises(AttributeError): self.dv.name = 'bar' with self.assertRaises(AttributeError): self.dv.dataset = self.ds self.assertIsInstance(self.ds['x'].as_index, pd.Index) with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): self.ds['foo'].as_index
def variables(self): return FrozenOrderedDict( (_decode_variable_name(k), self.open_store_variable(v)) for k, v in iteritems(self.store_variables))
def set_variables(self, variables): for vn, v in iteritems(variables): self.set_variable(vn, v)
def dimensions(self): return FrozenOrderedDict((k, len(v)) for k, v in iteritems(self.ds.dimensions))
def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset({k: v.transpose(*data[k].dimensions) for k, v in iteritems(dataset.variables)}, dataset.attrs)
def test_concat(self): data = create_test_data() split_data = [data.indexed(dim1=slice(10)), data.indexed(dim1=slice(10, None))] self.assertDatasetIdentical(data, Dataset.concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset({k: v.transpose(*data[k].dimensions) for k, v in iteritems(dataset.variables)}, dataset.attrs) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] self.assertDatasetIdentical(data, Dataset.concat(datasets, dim)) self.assertDatasetIdentical( data, Dataset.concat(datasets, data[dim])) self.assertDatasetIdentical( data, Dataset.concat(datasets, data[dim], mode='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [k for k, v in iteritems(data.variables) if dim in v.dimensions and k != dim] actual = Dataset.concat(datasets, data[dim], concat_over=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = Dataset.concat(datasets, data[dim], mode='different') self.assertDatasetIdentical(data, rectify_dim_order(actual)) # Now add a new variable that doesn't depend on any of the current # dims and make sure the mode argument behaves as expected data['var4'] = ('dim4', np.arange(data.dimensions['dim3'])) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] actual = Dataset.concat(datasets, data[dim], mode='all') expected = np.array([data['var4'].values for _ in range(data.dimensions[dim])]) self.assertArrayEqual(actual['var4'].values, expected) actual = Dataset.concat(datasets, data[dim], mode='different') self.assertDataArrayEqual(data['var4'], actual['var4']) actual = Dataset.concat(datasets, data[dim], mode='minimal') self.assertDataArrayEqual(data['var4'], actual['var4']) # verify that the dimension argument takes precedence over # concatenating dataset variables of the same name dimension = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dimension self.assertDatasetIdentical( expected, Dataset.concat(datasets, dimension)) # TODO: factor this into several distinct tests data = create_test_data() split_data = [data.indexed(dim1=slice(10)), data.indexed(dim1=slice(10, None))] with self.assertRaisesRegexp(ValueError, 'must supply at least one'): Dataset.concat([], 'dim1') with self.assertRaisesRegexp(ValueError, 'not all elements in'): Dataset.concat(split_data, 'dim1', concat_over=['not_found']) with self.assertRaisesRegexp(ValueError, 'global attributes not'): data0, data1 = deepcopy(split_data) data1.attrs['foo'] = 'bar' Dataset.concat([data0, data1], 'dim1', compat='identical') self.assertDatasetIdentical( data, Dataset.concat([data0, data1], 'dim1', compat='equals')) with self.assertRaisesRegexp(ValueError, 'encountered unexpected'): data0, data1 = deepcopy(split_data) data1['foo'] = ('bar', np.random.randn(10)) Dataset.concat([data0, data1], 'dim1') with self.assertRaisesRegexp(ValueError, 'not equal across datasets'): data0, data1 = deepcopy(split_data) data1['dim2'] = 2 * data1['dim2'] Dataset.concat([data0, data1], 'dim1')
def dimensions(self): return FrozenOrderedDict( (k, len(v)) for k, v in iteritems(self.ds.dimensions))
def _decode_attrs(d): # don't decode _FillValue from bytes -> unicode, because we want to ensure # that its type matches the data exactly return OrderedDict((k, v if k == '_FillValue' else _decode_string(v)) for (k, v) in iteritems(d))
def variables(self): return FrozenOrderedDict((_decode_variable_name(k), self.open_store_variable(v)) for k, v in iteritems(self.store_variables))
def set_variables(self, variables): for vn, v in iteritems(variables): self.set_variable(_encode_variable_name(vn), v)
def set_attributes(self, attributes): for k, v in iteritems(attributes): self.set_attribute(k, v)
def variables(self): return FrozenOrderedDict((k, self.open_store_variable(v)) for k, v in iteritems(self.store_variables))
def test_concat(self): data = create_test_data() split_data = [ data.indexed(dim1=slice(10)), data.indexed(dim1=slice(10, None)) ] self.assertDatasetIdentical(data, Dataset.concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset( { k: v.transpose(*data[k].dimensions) for k, v in iteritems(dataset.variables) }, dataset.attrs) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] self.assertDatasetIdentical(data, Dataset.concat(datasets, dim)) self.assertDatasetIdentical(data, Dataset.concat(datasets, data[dim])) self.assertDatasetIdentical( data, Dataset.concat(datasets, data[dim], mode='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [ k for k, v in iteritems(data.variables) if dim in v.dimensions and k != dim ] actual = Dataset.concat(datasets, data[dim], concat_over=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = Dataset.concat(datasets, data[dim], mode='different') self.assertDatasetIdentical(data, rectify_dim_order(actual)) # Now add a new variable that doesn't depend on any of the current # dims and make sure the mode argument behaves as expected data['var4'] = ('dim4', np.arange(data.dimensions['dim3'])) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] actual = Dataset.concat(datasets, data[dim], mode='all') expected = np.array( [data['var4'].values for _ in range(data.dimensions[dim])]) self.assertArrayEqual(actual['var4'].values, expected) actual = Dataset.concat(datasets, data[dim], mode='different') self.assertDataArrayEqual(data['var4'], actual['var4']) actual = Dataset.concat(datasets, data[dim], mode='minimal') self.assertDataArrayEqual(data['var4'], actual['var4']) # verify that the dimension argument takes precedence over # concatenating dataset variables of the same name dimension = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dimension self.assertDatasetIdentical(expected, Dataset.concat(datasets, dimension)) # TODO: factor this into several distinct tests data = create_test_data() split_data = [ data.indexed(dim1=slice(10)), data.indexed(dim1=slice(10, None)) ] with self.assertRaisesRegexp(ValueError, 'must supply at least one'): Dataset.concat([], 'dim1') with self.assertRaisesRegexp(ValueError, 'not all elements in'): Dataset.concat(split_data, 'dim1', concat_over=['not_found']) with self.assertRaisesRegexp(ValueError, 'global attributes not'): data0, data1 = deepcopy(split_data) data1.attrs['foo'] = 'bar' Dataset.concat([data0, data1], 'dim1', compat='identical') self.assertDatasetIdentical( data, Dataset.concat([data0, data1], 'dim1', compat='equals')) with self.assertRaisesRegexp(ValueError, 'encountered unexpected'): data0, data1 = deepcopy(split_data) data1['foo'] = ('bar', np.random.randn(10)) Dataset.concat([data0, data1], 'dim1') with self.assertRaisesRegexp(ValueError, 'not equal across datasets'): data0, data1 = deepcopy(split_data) data1['dim2'] = 2 * data1['dim2'] Dataset.concat([data0, data1], 'dim1')
def _decode_values(d): return OrderedDict((k, _decode_string(v)) for (k, v) in iteritems(d))
def set_dimensions(self, dimensions): for d, l in iteritems(dimensions): self.set_dimension(d, l)