def _roperation(self, func, other): return _operation.operation(func, other, self, broadcast=get_option('op.broadcast'), reindex=get_option('op.reindex'), constructor=self._constructor)
def _setitem(self, indices, values, axis=0, indexing=None, tol=None, broadcast=None, cast=False, inplace=True): """ See Also -------- DimArray.read, DimArrayOnDisk.write """ if broadcast is None: if self._broadcast is None: broadcast = get_option('indexing.broadcast') else: broadcast = self._broadcast if not inplace: self = self.copy() # special-case: full-shape boolean indexing (will fail with netCDF4) if self._is_boolean_index_nd(indices): self._setvalues_bool(indices, values, cast=cast) else: idx = self._get_indices(indices, tol=tol, indexing=indexing, axis=axis) if broadcast: self._setvalues_broadcast(idx, values, cast=cast) else: self._setvalues_ortho(idx, values, cast=cast) if not inplace: return self
def repr_dimarray(self, metadata=False, lazy=False): header = self.__class__.__name__ # lazy = not isinstance(self, da.DimArray)) if lazy: header = header + ": " + repr(self.name) + " (%i" % self.size + ")" else: header = self.__class__.__name__.lower() + ": " + stats_dimarray(self) lines = [header] # axes if self.ndim > 0: lines.append(repr_axes(self.axes, metadata=metadata)) # metadata if metadata and len(self.attrs) > 0: lines.append("attributes:") lines.append(repr_attrs(self.attrs)) # lines.append(str_attrs(self.attrs, indent=8) ) # the data itself if lazy: # line = "array(...)" if self.ndim > 0 else str(self[0]) # line = self.name+("(...)" if self.ndim > 0 else repr((self[0],))) line = "" elif self.size > get_option("display.max"): line = "array(...)" else: line = repr(self.values) if line: lines.append(line) return "\n".join(lines)
def _maybe_open_file(f, mode="r", clobber=None, verbose=False, format=None): """ open a netCDF4 file Parameters ---------- f : file name (str) or netCDF file handle mode: changed from original 'r','w','r' & clobber option: mode : `str` read or write access - 'r': read - 'w' : write, overwrite if file if present (clobber=True) - 'w-': create new file, but raise Exception if file is present (clobber=False) - 'a' : append, raise Exception if file is not present - 'a+': append if file is present, otherwise create format: passed to netCDF4.Dataset, only relevatn when mode = 'w', 'w-', 'a+' 'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC', 'NETCDF3_64BIT' Returns ------- f : netCDF file handle close: `bool`, `True` if input f indicated file name """ format = format or get_option("io.nc.format") if mode == "w-": mode = "w" if clobber is None: clobber = False # mode 'a+' appends if file exists, otherwise create new variable elif mode == "a+" and not isinstance(f, nc.Dataset): if os.path.exists(f): mode = "a" else: mode = "w" if clobber is None: clobber = False else: if clobber is None: clobber = True if not isinstance(f, nc.Dataset): fname = f # make sure the file does not exist if mode == "w" if os.path.exists(fname) and clobber and mode == "w": os.remove(fname) try: f = nc.Dataset(fname, mode, clobber=clobber, format=format) except UserWarning, msg: print msg except Exception, msg: # raise a weird RuntimeError # print "read from",fname raise IOError("{} => failed to opend {} in mode {}".format(msg, fname, mode)) # easier to handle
def take_axis(self, indices, axis=0, indexing=None, mode='raise'): """ Analogous to DimArray.take_axis """ if not np.iterable(indices): raise TypeError("indices must be iterable") indexing = indexing or getattr(self, "_indexing", None) or get_option("indexing.by") if indexing == "label": indices = self.axes[axis].loc(indices, mode=mode) if mode not in ('raise', 'clip', 'wrap'): mode = 'raise' return self.reduce_axis(np.take, indices=indices, axis=axis, mode=mode, keepattrs=True, keepdims=True)
def _binary_op(self, func, other): """ generalize DimArray operation to a Dataset, for each key In case the keys differ, returns the intersection of the two datasets Just for testing: >>> ds = Dataset(b=DimArray([[0.,1],[1,2]])) >>> -ds Dataset of 1 variable 0 / x0 (2): 0 to 1 1 / x1 (2): 0 to 1 b: ('x0', 'x1') >>> -ds["b"] dimarray: 4 non-null elements (0 null) 0 / x0 (2): 0 to 1 1 / x1 (2): 0 to 1 array([[-0., -1.], [-1., -2.]]) >>> np.all(ds == ds) True >>> assert isinstance(-ds, Dataset) >>> assert isinstance(ds/0.5, Dataset) >>> assert isinstance(ds*0, Dataset) >>> (-ds -ds + ds/0.5 + ds*0+1)['b'] dimarray: 4 non-null elements (0 null) 0 / x0 (2): 0 to 1 1 / x1 (2): 0 to 1 array([[1., 1.], [1., 1.]]) >>> ds += 1 >>> ds['b'] dimarray: 4 non-null elements (0 null) 0 / x0 (2): 0 to 1 1 / x1 (2): 0 to 1 array([[1., 2.], [2., 3.]]) """ assert isinstance(other, Dataset) or isscalar(other), "can only combine Datasets objects (func={})".format(func.__name__) # align all axes first reindex = get_option("op.reindex") if reindex and hasattr(other, 'axes') and other.axes != self.axes: other.reindex_like(self) # now proceed to operation res = self.__class__() for k1 in self.keys(): if hasattr(other, 'keys'): for k2 in other.keys(): if k1 == k2: res[k1] = self[k1]._binary_op(func, other[k2]) else: res[k1] = self[k1]._binary_op(func, other) return res
def _getitem(self, indices=None, axis=0, indexing=None, tol=None, broadcast=None, keepdims=False, broadcast_arrays=None, # back-compatibility for broadcast ): if indices is None: indices = () if broadcast_arrays is not None: warnings.warn(FutureWarning("broadcast_arrays is deprecated, use broadcast instead")) broadcast = broadcast_arrays if broadcast is None: if self._broadcast is None: broadcast = get_option('indexing.broadcast') else: broadcast = self._broadcast # special-case: full-shape boolean indexing (will fail with netCDF4) if self._is_boolean_index_nd(indices): if hasattr(self, 'compress'): return self.compress(indices) else: raise TypeError("{} does not support boolean indexing".format(self.__class__.__name__)) idx = self._get_indices(indices, axis=axis, indexing=indexing, tol=tol, keepdims=keepdims) # special case: broadcast arrays a la numpy if broadcast: axes = self._getaxes_broadcast(idx) values = self._getvalues_broadcast(idx) else: axes = self._getaxes_ortho(idx) values = self._getvalues_ortho(idx) if np.isscalar(values): return values dima = self._constructor(values, axes) # initialize DimArray dima.attrs.update(self.attrs) # add attribute return dima
def __repr__(self): """ pretty printing """ try: if self.ndim > 0: nonnull = np.size(self.values[~np.isnan(self.values)]) else: nonnull = ~np.isnan(self.values) except TypeError: # e.g. object nonnull = self.size lines = [] #if self.size < 10: # line = "dimarray: "+repr(self.values) #else: line = "dimarray: {} non-null elements ({} null)".format( nonnull, self.size - nonnull) lines.append(line) # # show metadata as well? # If len(self.ncattrs()) > 0: # line = self.repr_meta() # lines.append(line) if True: #self.size > 1: line = repr(self.axes) lines.append(line) if self.size < get_option('display.max'): line = repr(self.values) else: line = "array(...)" lines.append(line) return "\n".join(lines)
def __repr__(self): """ pretty printing """ try: if self.ndim > 0: nonnull = np.size(self.values[~np.isnan(self.values)]) else: nonnull = ~np.isnan(self.values) except TypeError: # e.g. object nonnull = self.size lines = [] #if self.size < 10: # line = "dimarray: "+repr(self.values) #else: line = "dimarray: {} non-null elements ({} null)".format(nonnull, self.size-nonnull) lines.append(line) # # show metadata as well? # If len(self.ncattrs()) > 0: # line = self.repr_meta() # lines.append(line) if True: #self.size > 1: line = repr(self.axes) lines.append(line) if self.size < get_option('display.max'): line = repr(self.values) else: line = "array(...)" lines.append(line) return "\n".join(lines)
from dimarray.core import DimArray, Axis, Axes from dimarray.config import get_option from dimarray.core.bases import AbstractDimArray, AbstractDataset, AbstractAxis, GetSetDelAttrMixin, AbstractAxes # from dimarray.core.metadata import _repr_metadata from dimarray.prettyprinting import repr_axis, repr_axes, repr_dimarray, repr_dataset, repr_attrs from .conventions import encode_cf_datetime, decode_cf_datetime __all__ = ["read_nc", "summary_nc", "write_nc"] # # Global variables # FORMAT = get_option("io.nc.format") # for the doc # # Helper functions # def maybe_encode_values(values, format=None): """ strings are given "object" type in Axis object ==> assume all objects are actually strings NOTE: this will fail for other object-typed axes such as tuples """ # if dtype is np.dtype('O'): values = np.asarray(values) dtype = values.dtype cf_attrs = {} if dtype.kind in ("S", "O"):
def reindex_axis(self, values, axis=0, method="exact", repna=True, fill_value=np.nan, tol=TOLERANCE, use_pandas=None): """ reindex an array along an axis Input: - values : array-like or Axis: new axis values - axis : axis number or name - method : "exact" (default), "nearest", "interp" - repna: if False, raise error when an axis value is not present otherwise just replace with NaN. Defaulf is True - fill_value: value to use instead of missing data - tol: re-index with a particular tolerance (can be longer) - use_pandas, optional: bool : if True (the default), convert to pandas for re-indexing If any special option (method, tol) is set or if modulo axes are present or, of course, if pandas is not installed, this option is set to False by default. Output: - DimArray Examples: --------- Basic reindexing: fill missing values with NaN >>> a = da.DimArray([1,2,3],('x0', [1,2,3])) >>> b = da.DimArray([3,4],('x0',[1,3])) >>> b.reindex_axis([1,2,3]) dimarray: 2 non-null elements (1 null) dimensions: 'x0' 0 / x0 (3): 1 to 3 array([ 3., nan, 4.]) Or replace with anything else, like -9999 >>> b.reindex_axis([1,2,3], fill_value=-9999) dimarray: 3 non-null elements (0 null) dimensions: 'x0' 0 / x0 (3): 1 to 3 array([ 3, -9999, 4]) "nearest" mode >>> b.reindex_axis([0,1,2,3], method='nearest') # out-of-bound to NaN dimarray: 3 non-null elements (1 null) dimensions: 'x0' 0 / x0 (4): 0 to 3 array([ nan, 3., 3., 4.]) "interp" mode >>> b.reindex_axis([0,1,2,3], method='interp') # out-of-bound to NaN dimarray: 3 non-null elements (1 null) dimensions: 'x0' 0 / x0 (4): 0 to 3 array([ nan, 3. , 3.5, 4. ]) """ if isinstance(values, Axis): newaxis = values values = newaxis.values axis = newaxis.name axis_id = self.axes.get_idx(axis) ax = self.axes[axis_id] # Axis object axis_nm = ax.name # do nothing if axis is same or only None element if ax.values[0] is None or np.all(values == ax.values): return self # check whether pandas can be used for re-indexing if use_pandas is None: use_pandas = get_option("optim.use_pandas") # ...any special option activated? if ( method != "exact" or tol is not None or ax.tol is not None or ax.modulo is not None or self.ndim > 4 ): # pandas defined up to 4-D use_pandas = False # ...is pandas installed? try: import pandas except ImportError: use_pandas = False # re-index using pandas if use_pandas: pandasobj = self.to_pandas() newpandas = pandasobj.reindex_axis(values, axis=axis_id, fill_value=fill_value) newobj = self.from_pandas(newpandas) # use class method from_pandas newobj._metadata = self._metadata # add metadata back newobj.axes[axis_id].name = axis_nm # give back original name # indices along which to sample elif method == "exact": newobj = take_na(self, values, axis=axis, repna=repna, fill_value=fill_value) elif method in ("nearest", "interp"): from interpolation import interp newobj = interp(self, values, axis=axis, method=method, repna=repna) else: raise ValueError("invalid reindex_axis method: " + repr(method)) # assert np.all((np.isnan(ax0.values) | (ax0.values == ax1.values))), "pb when reindexing" return newobj
def _operation(self, func, other): """ make an operation: this include axis and dimensions alignment Just for testing: >>> b = DimArray([[0.,1],[1,2]]) >>> b ... # doctest: +SKIP array([[ 0., 1.], [ 1., 2.]]) >>> np.all(b == b) True >>> np.all(b+2 == b + np.ones(b.shape)*2) True >>> np.all(b+b == b*2) True >>> np.all(b*b == b**2) True >>> np.all((b - b.values) == b - b) True >>> -b dimarray: 4 non-null elements (0 null) dimensions: 'x0', 'x1' 0 / x0 (2): 0 to 1 1 / x1 (2): 0 to 1 array([[-0., -1.], [-1., -2.]]) >>> np.all(-b == 0. - b) True True divide by default >>> a = DimArray([1,2,3]) >>> a/2 dimarray: 3 non-null elements (0 null) dimensions: 'x0' 0 / x0 (3): 0 to 2 array([ 0.5, 1. , 1.5]) >>> a//2 dimarray: 3 non-null elements (0 null) dimensions: 'x0' 0 / x0 (3): 0 to 2 array([0, 1, 1]) Test group/corps structure (result of operation remains DimArray) >>> a = DimArray([[1.,2,3],[4,5,6]]) >>> isinstance(a + 2., DimArray) True >>> isinstance(2. + a, DimArray) True >>> isinstance(2 * a, DimArray) True >>> isinstance(a * 2, DimArray) True >>> isinstance(2 / a, DimArray) True >>> isinstance(a / 2, DimArray) True >>> isinstance(2 - a, DimArray) True >>> isinstance(a - 2, DimArray) True >>> s = 0. >>> for i in range(5): ... s = s + a >>> isinstance(a, DimArray) True >>> np.all(s == 5*a) True """ result = _operation.operation(func, self, other, broadcast=get_option('op.broadcast'), reindex=get_option('op.reindex'), constructor=self._constructor) return result
def _get_indices(self, indices, axis=0, indexing=None, tol=None, keepdims=False): """ Return an n-D indexer Parameters ---------- **kwargs: same as DimArray.take or DimArrayOnDisk.read Returns ------- indexer : tuple of numpy-compatible indices, of length equal to the number of dimensions. """ indexing = indexing or getattr(self,'_indexing',None) or get_option('indexing.by') dims = self.dims if indices is None: indices = () if tol is None: tol = getattr(self, '_tol', None) # # Convert indices to tuple, from a variety of input formats # # special case: numpy like (idx, axis) if axis not in (0, None): indices = {axis:indices} # special case: Axes is provided as index elif isinstance(indices, AbstractAxes): indices = {ax.name:ax.values for ax in indices} # should always be a tuple if isinstance(indices, dict): # replace int dimensions with str dimensions for k in indices: if not isinstance(k, basestring): indices[dims[k]] = indices[k] del indices[k] else: if k not in dims: raise ValueError("Dimension {} not found. Existing dimensions: {}".format(k, dims)) indices = tuple(indices[d] if d in indices else slice(None) for d in dims) # expand to N-D tuple, and expands ellipsis indices = expanded_indexer(indices, self.ndim) # load each dimension as necessary indexer = () for i, ix in enumerate(indices): dim = dims[i] if not np.isscalar(ix) and not isinstance(ix, slice): ix = np.asarray(ix) # boolean indices are fine if isinstance(ix, np.ndarray) and ix.dtype.kind == 'b': pass # in case of label-based indexing, need to read the whole dimension # and look for the appropriate values elif indexing != 'position' and not (type(ix) is slice and ix == slice(None)): # find the index corresponding to the required axis value lix = ix ix = self.axes[dim].loc(lix, tol=tol) # numpy rule: a singleton list does not collapse the axis if keepdims and np.isscalar(ix): ix = [ix] indexer += (ix,) return indexer
def __init__(self, values=None, axes=None, dims=None, labels=None, copy=False, dtype=None, _indexing=None, _indexing_broadcast=None, **kwargs): """ Initialization. See help on DimArray. """ # check if attached to values (e.g. DimArray object) if hasattr(values, "axes") and axes is None: axes = values.axes # default options if _indexing is None: _indexing = get_option('indexing.by') if _indexing_broadcast is None: _indexing_broadcast = get_option('indexing.broadcast') # # array values # # if masked array, replace mask by NaN if isinstance(values, np.ma.MaskedArray): try: values = values.filled(np.nan) # fill mask with nans # first convert to float except: values = np.ma.asarray(values, dtype=float).filled( np.nan) # fill mask with nans if values is not None: values = np.array(values, copy=copy, dtype=dtype) # # Initialize the axes # if axes is None and labels is None: assert values is not None, "values= or/and axes=, labels= required to determine dimensions" axes = Axes._init(axes, dims=dims, labels=labels, shape=values.shape if values is not None else None) assert type(axes) is Axes # if values not provided, create empty data, filled with NaNs if dtype is float if values is None: values = np.empty([ax.size for ax in axes], dtype=dtype) if dtype in (float, None, np.dtype(float)): values.fill(np.nan) else: warnings.warn( "no nan representation for {}, array left empty".format( repr(dtype))) # # store all fields # self.values = values self.axes = axes ## options self._indexing = _indexing self._indexing_broadcast = _indexing_broadcast # # metadata (see Metadata type in metadata.py) # #for k in kwargs: # setncattr(self, k, kwargs[k]) # perform type-checking and store in self._metadata self._metadata = kwargs # Check consistency between axes and values inferred = tuple([ax.size for ax in self.axes]) if inferred != self.values.shape: msg = """\ shape inferred from axes: {} shape inferred from data: {} mismatch between values and axes""".format(inferred, self.values.shape) raise Exception(msg) # If a general ordering relationship of the class is assumed, # always sort the class if self._order is not None and self.dims != tuple( dim for dim in self._order if dim in self.dims): present = filter(lambda x: x in self.dims, self._order) # prescribed missing = filter(lambda x: x not in self._order, self.dims) # not order = missing + present # prepend dimensions not found in ordering relationship obj = self.transpose(order) self.values = obj.values self.axes = obj.axes
from dimarray.dataset import Dataset, concatenate_ds, stack_ds from dimarray.core import DimArray, Axis, Axes from dimarray.config import get_option from dimarray.core.bases import AbstractDimArray, AbstractDataset, AbstractAxis, GetSetDelAttrMixin, AbstractAxes # from dimarray.core.metadata import _repr_metadata from dimarray.prettyprinting import repr_axis, repr_axes, repr_dimarray, repr_dataset, repr_attrs from .conventions import encode_cf_datetime, decode_cf_datetime __all__ = ['read_nc','summary_nc', 'write_nc'] # # Global variables # FORMAT = get_option('io.nc.format') # for the doc # # Helper functions # def maybe_encode_values(values, format=None): """ strings are given "object" type in Axis object ==> assume all objects are actually strings NOTE: this will fail for other object-typed axes such as tuples """ # if dtype is np.dtype('O'): values = np.asarray(values) dtype = values.dtype cf_attrs = {} if dtype.kind in ('S','O'):
def __init__(self, values=None, axes=None, dims=None, labels=None, copy=False, dtype=None, _indexing=None, _indexing_broadcast=None, **kwargs): """ Initialization. See help on DimArray. """ # check if attached to values (e.g. DimArray object) if hasattr(values, "axes") and axes is None: axes = values.axes # default options if _indexing is None: _indexing = get_option('indexing.by') if _indexing_broadcast is None: _indexing_broadcast = get_option('indexing.broadcast') # # array values # # if masked array, replace mask by NaN if isinstance(values, np.ma.MaskedArray): try: values = values.filled(np.nan) # fill mask with nans # first convert to float except: values = np.ma.asarray(values, dtype=float).filled(np.nan) # fill mask with nans if values is not None: values = np.array(values, copy=copy, dtype=dtype) # # Initialize the axes # if axes is None and labels is None: assert values is not None, "values= or/and axes=, labels= required to determine dimensions" axes = Axes._init(axes, dims=dims, labels=labels, shape=values.shape if values is not None else None) assert type(axes) is Axes # if values not provided, create empty data, filled with NaNs if dtype is float if values is None: values = np.empty([ax.size for ax in axes], dtype=dtype) if dtype in (float, None, np.dtype(float)): values.fill(np.nan) else: warnings.warn("no nan representation for {}, array left empty".format(repr(dtype))) # # store all fields # self.values = values self.axes = axes ## options self._indexing = _indexing self._indexing_broadcast = _indexing_broadcast # # metadata (see Metadata type in metadata.py) # #for k in kwargs: # setncattr(self, k, kwargs[k]) # perform type-checking and store in self._metadata self._metadata = kwargs # Check consistency between axes and values inferred = tuple([ax.size for ax in self.axes]) if inferred != self.values.shape: msg = """\ shape inferred from axes: {} shape inferred from data: {} mismatch between values and axes""".format(inferred, self.values.shape) raise Exception(msg) # If a general ordering relationship of the class is assumed, # always sort the class if self._order is not None and self.dims != tuple(dim for dim in self._order if dim in self.dims): present = filter(lambda x: x in self.dims, self._order) # prescribed missing = filter(lambda x: x not in self._order, self.dims) # not order = missing + present # prepend dimensions not found in ordering relationship obj = self.transpose(order) self.values = obj.values self.axes = obj.axes
def __init__(self, ds, name, _indexing=None): self._indexing = _indexing or get_option('indexing.by') self._ds = ds self._name = name
def reindex_axis(self, values, axis=0, method='exact', repna=True, fill_value=np.nan, tol=TOLERANCE, use_pandas=None): """ reindex an array along an axis Input: - values : array-like or Axis: new axis values - axis : axis number or name - method : "exact" (default), "nearest", "interp" - repna: if False, raise error when an axis value is not present otherwise just replace with NaN. Defaulf is True - fill_value: value to use instead of missing data - tol: re-index with a particular tolerance (can be longer) - use_pandas, optional: bool : if True (the default), convert to pandas for re-indexing If any special option (method, tol) is set or if modulo axes are present or, of course, if pandas is not installed, this option is set to False by default. Output: - DimArray Examples: --------- Basic reindexing: fill missing values with NaN >>> a = da.DimArray([1,2,3],('x0', [1,2,3])) >>> b = da.DimArray([3,4],('x0',[1,3])) >>> b.reindex_axis([1,2,3]) dimarray: 2 non-null elements (1 null) dimensions: 'x0' 0 / x0 (3): 1 to 3 array([ 3., nan, 4.]) Or replace with anything else, like -9999 >>> b.reindex_axis([1,2,3], fill_value=-9999) dimarray: 3 non-null elements (0 null) dimensions: 'x0' 0 / x0 (3): 1 to 3 array([ 3, -9999, 4]) "nearest" mode >>> b.reindex_axis([0,1,2,3], method='nearest') # out-of-bound to NaN dimarray: 3 non-null elements (1 null) dimensions: 'x0' 0 / x0 (4): 0 to 3 array([ nan, 3., 3., 4.]) "interp" mode >>> b.reindex_axis([0,1,2,3], method='interp') # out-of-bound to NaN dimarray: 3 non-null elements (1 null) dimensions: 'x0' 0 / x0 (4): 0 to 3 array([ nan, 3. , 3.5, 4. ]) """ if isinstance(values, Axis): newaxis = values values = newaxis.values axis = newaxis.name axis_id = self.axes.get_idx(axis) ax = self.axes[axis_id] # Axis object axis_nm = ax.name # do nothing if axis is same or only None element if ax.values[0] is None or np.all(values == ax.values): return self # check whether pandas can be used for re-indexing if use_pandas is None: use_pandas = get_option('optim.use_pandas') # ...any special option activated? if method != 'exact' or tol is not None or \ ax.tol is not None or ax.modulo is not None \ or self.ndim > 4: # pandas defined up to 4-D use_pandas = False # ...is pandas installed? try: import pandas except ImportError: use_pandas = False # re-index using pandas if use_pandas: pandasobj = self.to_pandas() newpandas = pandasobj.reindex_axis(values, axis=axis_id, fill_value=fill_value) newobj = self.from_pandas(newpandas) # use class method from_pandas newobj._metadata = self._metadata # add metadata back newobj.axes[axis_id].name = axis_nm # give back original name # indices along which to sample elif method == "exact": newobj = take_na(self, values, axis=axis, repna=repna, fill_value=fill_value) elif method in ("nearest", "interp"): from interpolation import interp newobj = interp(self, values, axis=axis, method=method, repna=repna) else: raise ValueError("invalid reindex_axis method: " + repr(method)) #assert np.all((np.isnan(ax0.values) | (ax0.values == ax1.values))), "pb when reindexing" return newobj