Пример #1
0
def finalize_save(dataset, cfmeta = True, pack = None):
# {{{
  from pygeode.formats import cfmeta as cf
  from pygeode.dataset import asdataset

  # Only pack if pack is true
  if pack:
    if hasattr(pack, '__len__'): # Assume this is a list of variables to pack
      vars = [PackVar(v) if v.name in pack else v for v in dataset.vars]
    else:
      vars = [PackVar(v) for v in dataset.vars]
    dset = asdataset(vars)
    dset.atts = dataset.atts.copy()
  else:
    dset = dataset

  # Encode standard axes back into netcdf metadata?
  if cfmeta is True:
    return cf.encode_cf(dset)
  else:
    return asdataset(dset)
Пример #2
0
 def __init__(self, dataset):
     from pygeode.formats.cfmeta import encode_cf
     self.dataset = dataset = encode_cf(dataset)
Пример #3
0
 def __init__(self, dataset):
   from pygeode.formats.cfmeta import encode_cf
   self.dataset = dataset = encode_cf(dataset)
Пример #4
0
def to_xarray(dataset):
    """
  Converts a PyGeode Dataset into an xarray Dataset.

  Parameters
  ----------
  dataset : pygeode.Dataset
    The dataset to be converted.

  Returns
  -------
  out : xarray.Dataset
    An object which can be used with the xarray package.
  """
    from pygeode.dataset import asdataset
    from pygeode.formats.cfmeta import encode_cf
    from pygeode.view import View
    from dask.base import tokenize
    import dask.array as da
    import xarray as xr
    dataset = asdataset(dataset)
    # Encode the axes/variables with CF metadata.
    dataset = encode_cf(dataset)
    out = dict()
    # Loop over each axis and variable.
    for var in list(dataset.axes) + list(dataset.vars):
        # Generate a unique name to identify it with dask.
        name = var.name + "-" + tokenize(var)
        dsk = dict()
        dims = [a.name for a in var.axes]

        # Special case: already have the values in memory.
        if hasattr(var, 'values'):
            out[var.name] = xr.DataArray(var.values,
                                         dims=dims,
                                         attrs=var.atts,
                                         name=var.name)
            continue

        # Keep track of all the slices that were made over each dimension.
        # This information will be used to determine the "chunking" that was done
        # on the variable from inview.loop_mem().
        slice_order = [[] for a in var.axes]
        chunks = []
        # Break up the variable into into portions that are small enough to fit
        # in memory.  These will become the "chunks" for dask.
        inview = View(var.axes)
        for outview in inview.loop_mem():
            integer_indices = list(map(tuple, outview.integer_indices))
            # Determine *how* loop_mem is splitting the axes, and define the chunk
            # sizes accordingly.
            # A little indirect, but loop_mem doesn't make its chunking choices
            # available to the caller.
            for o, sl in zip(slice_order, integer_indices):
                if sl not in o:
                    o.append(sl)
            ind = [o.index(sl) for o, sl in zip(slice_order, integer_indices)]
            # Add this chunk to the dask array.
            key = tuple([name] + ind)
            dsk[key] = (var.getview, outview, False)
        # Construct the dask array.
        chunks = [list(map(len, sl)) for sl in slice_order]
        arr = da.Array(dsk, name, chunks, dtype=var.dtype)
        # Wrap this into an xarray.DataArray (with metadata and named axes).
        out[var.name] = xr.DataArray(arr,
                                     dims=dims,
                                     attrs=var.atts,
                                     name=var.name)
    # Build the final xarray.Dataset.
    out = xr.Dataset(out, attrs=dataset.atts)
    # Re-decode the CF metadata on the xarray side.
    out = xr.conventions.decode_cf(out)
    return out
Пример #5
0
def to_xarray(dataset):
  """
  Converts a PyGeode Dataset into an xarray Dataset.

  Parameters
  ----------
  dataset : pygeode.Dataset
    The dataset to be converted.

  Returns
  -------
  out : xarray.Dataset
    An object which can be used with the xarray package.
  """
  from pygeode.dataset import asdataset
  from pygeode.formats.cfmeta import encode_cf
  from pygeode.view import View
  from dask.base import tokenize
  import dask.array as da
  import xarray as xr
  dataset = asdataset(dataset)
  # Encode the axes/variables with CF metadata.
  dataset = encode_cf(dataset)
  out = dict()
  # Loop over each axis and variable.
  for var in list(dataset.axes) + list(dataset.vars):
    # Generate a unique name to identify it with dask.
    name = var.name + "-" + tokenize(var)
    dsk = dict()
    dims = [a.name for a in var.axes]

    # Special case: already have the values in memory.
    if hasattr(var,'values'):
      out[var.name] = xr.DataArray(var.values, dims=dims, attrs=var.atts, name=var.name)
      continue

    # Keep track of all the slices that were made over each dimension.
    # This information will be used to determine the "chunking" that was done
    # on the variable from inview.loop_mem().
    slice_order = [[] for a in var.axes]
    chunks = []
    # Break up the variable into into portions that are small enough to fit
    # in memory.  These will become the "chunks" for dask.
    inview = View(var.axes)
    for outview in inview.loop_mem():
      integer_indices = map(tuple,outview.integer_indices)
      # Determine *how* loop_mem is splitting the axes, and define the chunk
      # sizes accordingly.
      # A little indirect, but loop_mem doesn't make its chunking choices
      # available to the caller.
      for o, sl in zip(slice_order, integer_indices):
        if sl not in o:
          o.append(sl)
      ind = [o.index(sl) for o, sl in zip(slice_order, integer_indices)]
      # Add this chunk to the dask array.
      key = tuple([name] + ind)
      dsk[key] = (var.getview, outview, False)
    # Construct the dask array.
    chunks = [map(len,sl) for sl in slice_order]
    arr = da.Array(dsk, name, chunks, dtype=var.dtype)
    # Wrap this into an xarray.DataArray (with metadata and named axes).
    out[var.name] = xr.DataArray(arr, dims = dims, attrs = var.atts, name=var.name)
  # Build the final xarray.Dataset.
  out = xr.Dataset(out, attrs=dataset.atts)
  # Re-decode the CF metadata on the xarray side.
  out = xr.conventions.decode_cf(out)
  return out