def load_values(fileid, varid, vartype, start, count, out=None): # Map the netCDF types to a numpy type # NOTE: character type not supported from ctypes import c_long from pygeode.tools import point import numpy as np if out is None: out = np.empty(count, dtype=numpy_type[vartype]) f = { 1: lib.nc_get_vara_schar, 2: lib.nc_get_vara_text, 3: lib.nc_get_vara_short, 4: lib.nc_get_vara_int, 5: lib.nc_get_vara_float, 6: lib.nc_get_vara_double, 7: lib.nc_get_vara_uchar, 8: lib.nc_get_vara_ushort, 9: lib.nc_get_vara_uint, 10: lib.nc_get_vara_longlong, 11: lib.nc_get_vara_ulonglong } A = c_long * len(start) _start = A(*start) _count = A(*count) ret = f[vartype](fileid, varid, _start, _count, point(out)) if ret != 0: raise IOError(lib.nc_strerror(ret)) return out
def get_attributes(obj_id, natts): from ctypes import create_string_buffer, c_long, byref import numpy as np from pygeode.tools import point atts = {} for i in range(natts): name = create_string_buffer(256) type = c_long() count = c_long() ret = lib.SDattrinfo(obj_id, i, name, byref(type), byref(count)) assert ret == 0 # Can only handle strings (type=3 or 4?) for now. # Note: should find HDF4 files that actually have numerical attributes before doing this name = str(name.value.decode()) count = count.value type = type.value if type in (3, 4): value = create_string_buffer(count) ret = lib.SDreadattr(obj_id, i, value) assert ret == 0 value = str(value.value.decode()) else: value = np.empty([count], dtype=numpy_type[type]) ret = lib.SDreadattr(obj_id, i, point(value)) assert ret == 0 if len(value) == 1: value = value[0] atts[name] = value return atts
def put_attributes (fileid, varid, atts, version): # {{{ from numpy import asarray # from ctypes import c_long from pygeode.tools import point from warnings import warn for name, value in atts.items(): # String? if isinstance(value, str): vtype = 2 ret = put_att_f[vtype](fileid, varid, name.encode('ascii'), len(value), value.encode('ascii')) assert ret == 0, lib.nc_strerror(ret) else: oldvalue = value value = asarray(value) # Numpy likes to use int64's a lot - force the arrays back down to int32? if isinstance(oldvalue,int) and oldvalue >= -2147483648 and oldvalue <= 2147483647: value = asarray(value, dtype='int32') # Drop unsupported data types if value.dtype.name.startswith('string'): warn ("no support for writing attributes containing an array of strings", stacklevel=3) return if value.dtype.name not in nc_type[version]: warn ("skipping attribute %s = %s (unsupported type %s)"%(name,value,value.dtype.name), stacklevel=3) return # Scalar? if value.shape == (): value = value.reshape([1]) vtype = nc_type[version][value.dtype.name] # Get the dtype again, but this time it should be compatible with the function we're writing to # (in case there is an implicit cast involved, i.e. int64's need to be cast to something else for netcdf) dtype = numpy_type[vtype] value = asarray(value, dtype=dtype) ret = put_att_f[vtype](fileid, varid, name.encode('ascii'), vtype, len(value), point(value)) assert ret == 0, lib.nc_strerror(ret)
def get_attributes (obj_id, natts): from ctypes import create_string_buffer, c_long, byref import numpy as np from pygeode.tools import point atts = {} for i in range(natts): name = create_string_buffer(256) type = c_long() count = c_long() ret = lib.SDattrinfo(obj_id, i, name, byref(type), byref(count)) assert ret == 0 # Can only handle strings (type=3 or 4?) for now. # Note: should find HDF4 files that actually have numerical attributes before doing this name = str(name.value.decode()) count = count.value type = type.value if type in (3,4): value = create_string_buffer(count) ret = lib.SDreadattr(obj_id, i, value) assert ret == 0 value = str(value.value.decode()) else: value = np.empty([count], dtype=numpy_type[type]) ret = lib.SDreadattr(obj_id, i, point(value)) assert ret == 0 if len(value) == 1: value = value[0] atts[name] = value return atts
def load_values(sds_id, start, count, out): import numpy as np from ctypes import c_int from pygeode.tools import point A = c_int * len(start) _start = A(*start) _stride = A(*([1] * len(start))) _count = A(*count) ret = lib.SDreaddata(sds_id, _start, _stride, _count, point(out)) assert ret == 0, 'HDF4 read error: SDreaddata returned code %d' % ret return out
def load_values (sds_id, start, count, out): import numpy as np from ctypes import c_int from pygeode.tools import point A = c_int * len(start) _start = A(*start) _stride = A(*([1]*len(start))) _count = A(*count) ret = lib.SDreaddata(sds_id, _start, _stride, _count, point(out)) assert ret == 0, 'HDF4 read error: SDreaddata returned code %d'%ret return out
def get_attributes(fileid, varid): # {{{ from ctypes import create_string_buffer, c_int, c_long, byref from pygeode.tools import point from numpy import empty natts = c_int() # Global attributes? if (varid < 0): ret = lib.nc_inq_natts(fileid, byref(natts)) assert ret == 0, lib.nc_strerror(ret) # Variable attributes? else: ret = lib.nc_inq_varnatts(fileid, varid, byref(natts)) assert ret == 0, lib.nc_strerror(ret) natts = natts.value atts = {} name = create_string_buffer(NC_MAX_NAME) vtype = c_int() size = c_long() # Loop over all attributes for n in range(natts): ret = lib.nc_inq_attname(fileid, varid, n, name) assert ret == 0, lib.nc_strerror(ret) ret = lib.nc_inq_att(fileid, varid, name, byref(vtype), byref(size)) assert ret == 0, lib.nc_strerror(ret) # String? if vtype.value == 2: valstr = create_string_buffer(size.value) ret = get_att_f[vtype.value](fileid, varid, name, valstr) assert ret == 0, lib.nc_strerror(ret) value = str(valstr.value.decode()) else: valnp = empty([size.value], numpy_type[vtype.value]) ret = get_att_f[vtype.value](fileid, varid, name, point(valnp)) assert ret == 0, lib.nc_strerror(ret) value = valnp if value.size == 1: value = value[0] atts[str(name.value.decode())] = value return atts
def get_attributes (fileid, varid): # {{{ from ctypes import create_string_buffer, c_int, c_long, byref from pygeode.tools import point from numpy import empty natts = c_int() # Global attributes? if (varid < 0): ret = lib.nc_inq_natts(fileid, byref(natts)) assert ret == 0, lib.nc_strerror(ret) # Variable attributes? else: ret = lib.nc_inq_varnatts (fileid, varid, byref(natts)) assert ret == 0, lib.nc_strerror(ret) natts = natts.value atts = {} name = create_string_buffer(NC_MAX_NAME) vtype = c_int() size = c_long() # Loop over all attributes for n in range(natts): ret = lib.nc_inq_attname(fileid, varid, n, name); assert ret == 0, lib.nc_strerror(ret) ret = lib.nc_inq_att (fileid, varid, name, byref(vtype), byref(size)) assert ret == 0, lib.nc_strerror(ret) # String? if vtype.value == 2: valstr = create_string_buffer(size.value) ret = get_att_f[vtype.value](fileid, varid, name, valstr); assert ret == 0, lib.nc_strerror(ret) value = str(valstr.value.decode()) else: valnp = empty([size.value], numpy_type[vtype.value]) ret = get_att_f[vtype.value](fileid, varid, name, point(valnp)) assert ret == 0, lib.nc_strerror(ret) value = valnp if value.size == 1: value = value[0] atts[str(name.value.decode())] = value return atts
def load_values (fileid, varid, vartype, start, count, out=None): # Map the netCDF types to a numpy type # NOTE: character type not supported from ctypes import c_long from pygeode.tools import point import numpy as np if out is None: out = np.empty(count, dtype = numpy_type[vartype]) f = {1:lib.nc_get_vara_schar, 2:lib.nc_get_vara_text, 3:lib.nc_get_vara_short, 4:lib.nc_get_vara_int, 5:lib.nc_get_vara_float, 6:lib.nc_get_vara_double, 7:lib.nc_get_vara_uchar, 8:lib.nc_get_vara_ushort, 9:lib.nc_get_vara_uint, 10:lib.nc_get_vara_longlong, 11:lib.nc_get_vara_ulonglong} A = c_long * len(start) _start = A(*start) _count = A(*count) ret = f[vartype](fileid, varid, _start, _count, point(out)) if ret != 0: raise IOError(lib.nc_strerror(ret)) return out
def put_attributes(fileid, varid, atts, version): # {{{ from numpy import asarray # from ctypes import c_long from pygeode.tools import point from warnings import warn for name, value in atts.items(): # String? if isinstance(value, str): vtype = 2 ret = put_att_f[vtype](fileid, varid, name.encode('ascii'), len(value), value.encode('ascii')) assert ret == 0, lib.nc_strerror(ret) else: oldvalue = value value = asarray(value) # Numpy likes to use int64's a lot - force the arrays back down to int32? if isinstance( oldvalue, int ) and oldvalue >= -2147483648 and oldvalue <= 2147483647: value = asarray(value, dtype='int32') # Drop unsupported data types if value.dtype.name.startswith('string'): warn( "no support for writing attributes containing an array of strings", stacklevel=3) return if value.dtype.name not in nc_type[version]: warn("skipping attribute %s = %s (unsupported type %s)" % (name, value, value.dtype.name), stacklevel=3) return # Scalar? if value.shape == (): value = value.reshape([1]) vtype = nc_type[version][value.dtype.name] # Get the dtype again, but this time it should be compatible with the function we're writing to # (in case there is an implicit cast involved, i.e. int64's need to be cast to something else for netcdf) dtype = numpy_type[vtype] value = asarray(value, dtype=dtype) ret = put_att_f[vtype](fileid, varid, name.encode('ascii'), vtype, len(value), point(value)) assert ret == 0, lib.nc_strerror(ret)
def save(filename, in_dataset, version=3, pack=None, compress=False, cfmeta=True, unlimited=None): # {{{ from ctypes import c_int, c_long, byref from pygeode.view import View from pygeode.tools import combine_axes, point from pygeode.axis import Axis, DummyAxis import numpy as np from pygeode.progress import PBar, FakePBar from pygeode.formats import finalize_save from pygeode.dataset import asdataset assert isinstance(filename, str) in_dataset = asdataset(in_dataset) dataset = finalize_save(in_dataset, cfmeta, pack) # Version? if compress: version = 4 assert version in (3, 4) fileid = c_int() vars = list(dataset.vars) # The output axes axes = combine_axes(v.axes for v in vars) # Include axes in the list of vars (for writing to netcdf). # Exclude axes which don't have any intrinsic values. vars = vars + [a for a in axes if not isinstance(a, DummyAxis)] #vars.extend(axes) # Variables (and axes) must all have unique names assert len(set([v.name for v in vars])) == len( vars), "vars must have unique names: %s" % [v.name for v in vars] if unlimited is not None: assert unlimited in [a.name for a in axes] # Functions for writing entire array allf = { 1: lib.nc_put_var_schar, 2: lib.nc_put_var_text, 3: lib.nc_put_var_short, 4: lib.nc_put_var_int, 5: lib.nc_put_var_float, 6: lib.nc_put_var_double, 7: lib.nc_put_var_uchar, 8: lib.nc_put_var_ushort, 9: lib.nc_put_var_uint, 10: lib.nc_put_var_longlong, 11: lib.nc_put_var_ulonglong } # Functions for writing chunks chunkf = { 1: lib.nc_put_vara_schar, 2: lib.nc_put_vara_text, 3: lib.nc_put_vara_short, 4: lib.nc_put_vara_int, 5: lib.nc_put_vara_float, 6: lib.nc_put_vara_double, 7: lib.nc_put_vara_uchar, 8: lib.nc_put_vara_ushort, 9: lib.nc_put_vara_uint, 10: lib.nc_put_vara_longlong, 11: lib.nc_put_vara_ulonglong } # Create the file if version == 3: ret = lib.nc_create(filename.encode('ascii'), 0, byref(fileid)) if ret != 0: raise IOError(lib.nc_strerror(ret)) elif version == 4: ret = lib.nc_create(filename.encode('ascii'), 0x1000, byref(fileid)) # 0x1000 = NC_NETCDF4 if ret != 0: raise IOError(lib.nc_strerror(ret)) else: raise Exception try: # Define the dimensions dimids = [None] * len(axes) for i, a in enumerate(axes): dimids[i] = c_int() if unlimited == a.name: ret = lib.nc_def_dim(fileid, a.name.encode('ascii'), c_long(0), byref(dimids[i])) else: ret = lib.nc_def_dim(fileid, a.name.encode('ascii'), c_long(len(a)), byref(dimids[i])) assert ret == 0, lib.nc_strerror(ret) # Define the variables (including axes) chunks = [None] * len(vars) varids = [None] * len(vars) for i, var in enumerate(vars): t = nc_type[version][var.dtype.name] # Generate the array of dimension ids for this var d = [dimids[list(axes).index(a)] for a in var.axes] # Make it C-compatible d = (c_int * var.naxes)(*d) varids[i] = c_int() ret = lib.nc_def_var(fileid, var.name.encode('ascii'), t, var.naxes, d, byref(varids[i])) assert ret == 0, lib.nc_strerror(ret) # Compress the data? (only works for netcdf4 or (higher?)) if compress: ret = lib.nc_def_var_deflate(fileid, varids[i], 1, 1, 2) assert ret == 0, lib.nc_strerror(ret) # Write the attributes # global attributes put_attributes(fileid, -1, dataset.atts, version) # variable attributes for i, var in enumerate(vars): # modify axes to be netcdf friendly (CF-compliant, etc.) put_attributes(fileid, varids[i], var.atts, version) # Don't pre-fill the file oldmode = c_int() ret = lib.nc_set_fill(fileid, 256, byref(oldmode)) assert ret == 0, "Can't set fill mode: %s (error %d)" % ( lib.nc_strerror(ret), ret) # Finished defining the variables, about to start writing the values ret = lib.nc_enddef(fileid) assert ret == 0, "Error leaving define mode: %s (error %d)" % ( lib.nc_strerror(ret), ret) # Relative progress of each variable sizes = [v.size for v in vars] prog = np.cumsum([0.] + sizes) / np.sum(sizes) * 100 # print "Saving '%s':"%filename pbar = PBar(message="Saving '%s':" % filename) # pbar = FakePBar() # Write the data for i, var in enumerate(vars): t = nc_type[version][var.dtype.name] dtype = numpy_type[t] # print 'writing', var.name # number of actual variables (non-axes) for determining our progress N = len([v for v in vars if not isinstance(v, Axis)]) varpbar = pbar.subset(prog[i], prog[i + 1]) views = list(View(var.axes).loop_mem()) for j, v in enumerate(views): vpbar = varpbar.part(j, len(views)) # print '???', repr(str(v)) # Should always be slices (since we're looping over whole thing contiguously?) for sl in v.slices: assert isinstance(sl, slice) for sl in v.slices: assert sl.step in (1, None) start = [sl.start for sl in v.slices] count = [sl.stop - sl.start for sl in v.slices] start = (c_long * var.naxes)(*start) count = (c_long * var.naxes)(*count) if isinstance(var, Axis): assert len(start) == len(count) == 1 data = var.values data = data[ start[0]:start[0] + count[0]] # the above gives us the *whole* axis, # but under extreme conditions we may be looping over smaller pieces vpbar.update(100) else: data = v.get(var, pbar=vpbar) # Ensure the data is stored contiguously in memory data = np.ascontiguousarray(data, dtype=dtype) ret = chunkf[t](fileid, varids[i], start, count, point(data)) assert ret == 0, "Error writing var '%s' to netcdf: %s (error %d)" % ( var.name, lib.nc_strerror(ret), ret) finally: # Finished lib.nc_close(fileid)
def save (filename, in_dataset, version=3, pack=None, compress=False, cfmeta = True, unlimited=None): # {{{ from ctypes import c_int, c_long, byref from pygeode.view import View from pygeode.tools import combine_axes, point from pygeode.axis import Axis, DummyAxis import numpy as np from pygeode.progress import PBar, FakePBar from pygeode.formats import finalize_save from pygeode.dataset import asdataset assert isinstance(filename,str) in_dataset = asdataset(in_dataset) dataset = finalize_save(in_dataset, cfmeta, pack) # Version? if compress: version = 4 assert version in (3,4) fileid = c_int() vars = list(dataset.vars) # The output axes axes = combine_axes(v.axes for v in vars) # Include axes in the list of vars (for writing to netcdf). # Exclude axes which don't have any intrinsic values. vars = vars + [a for a in axes if not isinstance(a,DummyAxis)] #vars.extend(axes) # Variables (and axes) must all have unique names assert len(set([v.name for v in vars])) == len(vars), "vars must have unique names: %s"% [v.name for v in vars] if unlimited is not None: assert unlimited in [a.name for a in axes] # Functions for writing entire array allf = {1:lib.nc_put_var_schar, 2:lib.nc_put_var_text, 3:lib.nc_put_var_short, 4:lib.nc_put_var_int, 5:lib.nc_put_var_float, 6:lib.nc_put_var_double, 7:lib.nc_put_var_uchar, 8:lib.nc_put_var_ushort, 9:lib.nc_put_var_uint, 10:lib.nc_put_var_longlong, 11:lib.nc_put_var_ulonglong} # Functions for writing chunks chunkf = {1:lib.nc_put_vara_schar, 2:lib.nc_put_vara_text, 3:lib.nc_put_vara_short, 4:lib.nc_put_vara_int, 5:lib.nc_put_vara_float, 6:lib.nc_put_vara_double, 7:lib.nc_put_vara_uchar, 8:lib.nc_put_vara_ushort, 9:lib.nc_put_vara_uint, 10:lib.nc_put_vara_longlong, 11:lib.nc_put_vara_ulonglong} # Create the file if version == 3: ret = lib.nc_create (filename.encode('ascii'), 0, byref(fileid)) if ret != 0: raise IOError(lib.nc_strerror(ret)) elif version == 4: ret = lib.nc_create (filename.encode('ascii'), 0x1000, byref(fileid)) # 0x1000 = NC_NETCDF4 if ret != 0: raise IOError(lib.nc_strerror(ret)) else: raise Exception try: # Define the dimensions dimids = [None] * len(axes) for i,a in enumerate(axes): dimids[i] = c_int() if unlimited == a.name: ret = lib.nc_def_dim (fileid, a.name.encode('ascii'), c_long(0), byref(dimids[i])) else: ret = lib.nc_def_dim (fileid, a.name.encode('ascii'), c_long(len(a)), byref(dimids[i])) assert ret == 0, lib.nc_strerror(ret) # Define the variables (including axes) chunks = [None] * len(vars) varids = [None] * len(vars) for i, var in enumerate(vars): t = nc_type[version][var.dtype.name] # Generate the array of dimension ids for this var d = [dimids[list(axes).index(a)] for a in var.axes] # Make it C-compatible d = (c_int * var.naxes)(*d) varids[i] = c_int() ret = lib.nc_def_var (fileid, var.name.encode('ascii'), t, var.naxes, d, byref(varids[i])) assert ret == 0, lib.nc_strerror(ret) # Compress the data? (only works for netcdf4 or (higher?)) if compress: ret = lib.nc_def_var_deflate (fileid, varids[i], 1, 1, 2) assert ret == 0, lib.nc_strerror(ret) # Write the attributes # global attributes put_attributes (fileid, -1, dataset.atts, version) # variable attributes for i, var in enumerate(vars): # modify axes to be netcdf friendly (CF-compliant, etc.) put_attributes (fileid, varids[i], var.atts, version) # Don't pre-fill the file oldmode = c_int() ret = lib.nc_set_fill (fileid, 256, byref(oldmode)) assert ret == 0, "Can't set fill mode: %s (error %d)" % (lib.nc_strerror(ret), ret) # Finished defining the variables, about to start writing the values ret = lib.nc_enddef (fileid) assert ret == 0, "Error leaving define mode: %s (error %d)" % (lib.nc_strerror(ret), ret) # Relative progress of each variable sizes = [v.size for v in vars] prog = np.cumsum([0.]+sizes) / np.sum(sizes) * 100 # print "Saving '%s':"%filename pbar = PBar(message="Saving '%s':"%filename) # pbar = FakePBar() # Write the data for i, var in enumerate(vars): t = nc_type[version][var.dtype.name] dtype = numpy_type[t] # print 'writing', var.name # number of actual variables (non-axes) for determining our progress N = len([v for v in vars if not isinstance(v,Axis)]) varpbar = pbar.subset(prog[i], prog[i+1]) views = list(View(var.axes).loop_mem()) for j,v in enumerate(views): vpbar = varpbar.part(j, len(views)) # print '???', repr(str(v)) # Should always be slices (since we're looping over whole thing contiguously?) for sl in v.slices: assert isinstance(sl, slice) for sl in v.slices: assert sl.step in (1,None) start = [sl.start for sl in v.slices] count = [sl.stop - sl.start for sl in v.slices] start = (c_long*var.naxes)(*start) count = (c_long*var.naxes)(*count) if isinstance(var, Axis): assert len(start) == len(count) == 1 data = var.values data = data[start[0]:start[0]+count[0]] # the above gives us the *whole* axis, # but under extreme conditions we may be looping over smaller pieces vpbar.update(100) else: data = v.get(var, pbar=vpbar) # Ensure the data is stored contiguously in memory data = np.ascontiguousarray(data, dtype=dtype) ret = chunkf[t](fileid, varids[i], start, count, point(data)) assert ret == 0, "Error writing var '%s' to netcdf: %s (error %d)" % (var.name, lib.nc_strerror(ret), ret) finally: # Finished lib.nc_close(fileid)