def test_find_missval():
    grid = GridType("my_grid")

    assert find_missval(grid) == None

    grid.attributes["missing_value"] = np.array([9999])
    assert find_missval(grid) == 9999

    # _FillValue should take precendence over missing_value
    grid.attributes["_FillValue"] = np.array([0])
    assert find_missval(grid) == 0
Exemplo n.º 2
0
def simple_grid_dataset():
    """
    @brief Create a simple dap grid dataset
    Just use the pydap interface - passing dicts does not make sense here.
    """
    # Convert metadata and data to a dap dataset
    ds = DatasetType(name='SimpleGridData')

    g = GridType(name='grid')
    data = numpy.arange(24.)
    data.shape = (4, 2, 3)
    # The name in the dictionary must match the name in the basetype
    g['a'] = BaseType(name='a',
                      data=data,
                      shape=data.shape,
                      type=Float32,
                      dimensions=('time', 'x', 'y'))
    g['time'] = BaseType(name='time',
                         data=numpy.arange(4.),
                         shape=(4, ),
                         type=Float64)
    g['x'] = BaseType(name='x',
                      data=numpy.arange(2.),
                      shape=(2, ),
                      type=Float64)
    g['y'] = BaseType(name='y',
                      data=numpy.arange(3.),
                      shape=(3, ),
                      type=Float64)

    ds[g.name] = g
    return ds
    def stream(self):
        sz = 10

        time = numpy.arange(float(self.index), float(self.index + sz))
        self.index += sz

        data = numpy.arange(float(sz))
        for ind in range(sz):
            data[ind] = numpy.random.random()

        ds = DatasetType(name='SimpleGridData')
        g = GridType(name='Time Series')

        # The name in the dictionary must match the name in the basetype
        g['timeseries'] = BaseType(name='timeseries',
                                   data=data,
                                   shape=data.shape,
                                   type=Float32,
                                   dimensions=('time'))
        g['time'] = BaseType(name='time',
                             data=time,
                             shape=(sz, ),
                             type=Float32)

        ds[g.name] = g

        msg = dap_tools.ds2dap_msg(ds)

        yield self.send(self.deliver, 'data', msg.encode())
Exemplo n.º 4
0
def mean(dataset, var, axis=0):
    """Calculate the mean of an array along a given axis.

    The input variable should be either a ``GridType`` or ``BaseType``. The
    function will return an object of the same type with the mean applied.

    """
    if not isinstance(var, (GridType, BaseType)):
        raise ConstraintExpressionError(
            'Function "mean" should be used on an array or grid.')

    axis = int(axis)
    dims = tuple(dim for i, dim in enumerate(var.dimensions) if i != axis)

    # process basetype
    if isinstance(var, BaseType):
        return BaseType(name=var.name,
                        data=np.mean(var.data[:], axis=axis),
                        dimensions=dims,
                        attributes=var.attributes)

    # process grid
    out = GridType(name=var.name, attributes=var.attributes)
    out[var.array.name] = BaseType(name=var.array.name,
                                   data=np.mean(var.array.data[:], axis=axis),
                                   dimensions=dims,
                                   attributes=var.array.attributes)
    for dim in dims:
        out[dim] = BaseType(name=dim,
                            data=var[dim].data[:],
                            dimensions=(dim, ),
                            attributes=var[dim].attributes)
    return out
Exemplo n.º 5
0
def gridtype_example():
    """Create a simple grid."""
    example = GridType("example")
    example["a"] = BaseType("a", data=np.arange(30*50).reshape(30, 50))
    example["x"] = BaseType("x", data=np.arange(30))
    example["y"] = BaseType("y", data=np.arange(50))
    return example
Exemplo n.º 6
0
    def setUp(self):

        sz = 12
        time = numpy.arange(float(0), float(sz))
        data = numpy.arange(float(sz))
        for ind in range(sz):
            data[ind] = numpy.random.random()

        ds = DatasetType(name='SimpleGridData')
        g = GridType(name='TimeSeries')

        # The name in the dictionary must match the name in the basetype
        g['timeseries'] = BaseType(name='timeseries',
                                   data=data,
                                   shape=data.shape,
                                   type=Float32,
                                   dimensions=('time'))
        g['time'] = BaseType(name='time',
                             data=time,
                             shape=(sz, ),
                             type=Float32)

        ds[g.name] = g

        self.ds1 = ds

        self.tc = timeseries_consumer.TimeseriesConsumer()
        yield self.tc.plc_init()
Exemplo n.º 7
0
    def setUp(self):
        """Create a simple grid."""
        example = GridType("example")
        example["a"] = BaseType("a", data=np.arange(30 * 50).reshape(30, 50))
        example["x"] = BaseType("x", data=np.arange(30))
        example["y"] = BaseType("y", data=np.arange(50))

        self.example = example
Exemplo n.º 8
0
    def grid(self):
        """Parse a DAP grid, returning a ``GridType``."""
        grid = GridType('nameless')
        self.consume('grid')
        self.consume('{')

        self.consume('array')
        self.consume(':')
        array = self.base()
        grid[array.name] = array

        self.consume('maps')
        self.consume(':')
        while not self.peek('}'):
            var = self.base()
            grid[var.name] = var
        self.consume('}')

        grid.name = quote(self.consume('[^;]+'))
        self.consume(';')

        return grid
Exemplo n.º 9
0
    def grid(self):
        """Parse a DAP grid, returning a ``GridType``."""
        grid = GridType('nameless')
        self.consume('grid')
        self.consume('{')

        self.consume('array')
        self.consume(':')
        array = self.base()
        grid[array.name] = array

        self.consume('maps')
        self.consume(':')
        while not self.peek('}'):
            var = self.base()
            grid[var.name] = var
        self.consume('}')

        grid.name = quote(self.consume('[^;]+'))
        self.consume(';')

        return grid
Exemplo n.º 10
0
def demo_dataset():
    """
    @Brief Example methods for creating a dataset
    http://pydap.org/developer.html#the-dap-data-model
    """

    #Create a dataset object
    ds = DatasetType(name='Mine')

    #Add Some attributes
    ds.attributes['history'] = 'David made a dataset'
    ds.attributes['conventions'] = 'OOIs special format'

    # Create some data and put it in a variable
    varname = 'var1'
    data = (1, 2, 3, 4, 5, 8)
    shape = (8, )
    type = Int32  #
    dims = ('time', )
    attributes = {'long_name': 'long variable name one'}
    ds[varname] = BaseType(name=varname,
                           data=data,
                           shape=shape,
                           dimensions=dims,
                           type=type,
                           attributes=attributes)

    # Now make a grid data object
    g = GridType(name='g')
    data = numpy.arange(6.)
    data.shape = (2, 3)
    # The name in the dictionary must match the name in the basetype
    g['a'] = BaseType(name='a',
                      data=data,
                      shape=data.shape,
                      type=Float32,
                      dimensions=('x', 'y'))
    g['x'] = BaseType(name='x',
                      data=numpy.arange(2.),
                      shape=(2, ),
                      type=Float64)
    g['y'] = BaseType(name='y',
                      data=numpy.arange(3.),
                      shape=(3, ),
                      type=Float64)

    ds[g.name] = g

    return ds
Exemplo n.º 11
0
 def make_grid(self, response, name, data, time_data, attrs, time_attrs,
               dims, ttype):
     grid = GridType(name=name)
     grid[name] = BaseType(name=name,
                           data=data,
                           type=ttype,
                           attributes=attrs,
                           dimensions=dims,
                           shape=data.shape)
     grid[dims[0]] = BaseType(name=dims[0],
                              data=time_data,
                              type=time_data.dtype.char,
                              attributes=time_attrs,
                              dimensions=dims,
                              shape=time_data.shape)
     return grid
Exemplo n.º 12
0
    def __init__(self, filepath):
        BaseHandler.__init__(self)

        self.filepath = filepath
        try:
            with netcdf_file(self.filepath, 'r') as source:
                self.additional_headers.append(('Last-modified', (formatdate(
                    time.mktime(time.localtime(
                        os.stat(filepath)[ST_MTIME]))))))

                # shortcuts
                vars = source.variables
                dims = source.dimensions

                # build dataset
                name = os.path.split(filepath)[1]
                self.dataset = DatasetType(
                    name, attributes=dict(NC_GLOBAL=attrs(source)))
                for dim in dims:
                    if dims[dim] is None:
                        self.dataset.attributes['DODS_EXTRA'] = {
                            'Unlimited_Dimension': dim,
                        }
                        break

                # add grids
                grids = [var for var in vars if var not in dims]
                for grid in grids:
                    self.dataset[grid] = GridType(grid, attrs(vars[grid]))
                    # add array
                    self.dataset[grid][grid] = BaseType(
                        grid, LazyVariable(source, grid, grid, self.filepath),
                        vars[grid].dimensions, attrs(vars[grid]))
                    # add maps
                    for dim in vars[grid].dimensions:
                        self.dataset[grid][dim] = BaseType(
                            dim, vars[dim][:], None, attrs(vars[dim]))

                # add dims
                for dim in dims:
                    self.dataset[dim] = BaseType(dim, vars[dim][:], None,
                                                 attrs(vars[dim]))
        except Exception as exc:
            raise
            message = 'Unable to open file %s: %s' % (filepath, exc)
            raise OpenFileError(message)
        def add_variables(dataset, h5, level=0):
            assert type(h5) in (h5py.File, h5py.Group, h5py.Dataset)
            name = h5.name.lstrip('/')
            attrs = process_attrs(h5.attrs)

            # struct
            if type(h5) in (h5py.File, h5py.Group):
                foo = StructureType(name, attributes=attrs)
                name = foo.name
                dataset[name] = foo
                for bar in h5.values():
                    add_variables(dataset[name], bar, level + 1)
                return

            # Recursion base cases
            rank = len(h5.shape)
            # basetype
            if rank == 0:
                dataset[name] = BaseType(name,
                                         data=Hdf5Data(h5),
                                         dimensions=(),
                                         attributes=attrs)
            # sequence?
            #elif rank == 1:
            #    dataset[name] = SequenceType(name, data=h5, attributes=h5.attrs)
            # grid
            elif is_gridded(h5):
                parent = dataset[name] = GridType(name, attributes=attrs)
                dims = tuple([d.values()[0].name.lstrip('/') for d in h5.dims])
                logger.debug("DIMENSIONS: {}".format(dims))
                parent[name] = BaseType(
                    name, data=Hdf5Data(h5), dimensions=dims,
                    attributes=attrs)  # Add the main variable
                for dim in h5.dims:  # and all of the dimensions
                    add_variables(
                        parent, dim[0], level +
                        1)  # Why would dims have more than one h5py.Dataset?
            # BaseType
            else:
                dataset[name] = BaseType(name,
                                         data=Hdf5Data(h5),
                                         attributes=attrs)
Exemplo n.º 14
0
    def handle_dds(self, coverage, dataset, fields):
        cov = coverage
        try:
            time_name = coverage.temporal_parameter_name
            time_context = coverage.get_parameter_context(time_name)
            time_attrs = self.get_attrs(cov, time_name)
            time_base = BaseType(time_name,
                                 type=self.dap_type(time_context),
                                 attributes=time_attrs,
                                 dimensions=(time_name, ),
                                 shape=(coverage.num_timesteps, ))
            dataset[time_name] = time_base

        except:
            log.exception('Problem reading cov %s', str(cov))
            raise  # Can't do much without time

        for var in fields:
            while var:
                name, slice_ = var.pop(0)
                name = urllib.unquote(name)
                if name == time_name:
                    continue  # Already added to the dataset
                try:
                    grid = GridType(name=name)
                    context = coverage.get_parameter_context(name)
                    attrs = self.get_attrs(cov, name)

                    grid[name] = BaseType(name=name,
                                          type=self.dap_type(context),
                                          attributes=attrs,
                                          dimensions=(time_name, ),
                                          shape=(coverage.num_timesteps, ))
                    grid[cov.temporal_parameter_name] = time_base
                    dataset[name] = grid
                except Exception:
                    log.exception('Problem reading cov %s', str(cov))
                    continue
        return dataset
Exemplo n.º 15
0
                                             ("float", 1000.0),
                                         ]))
SimpleStructure['types']['b'] = BaseType('b', np.array(0, np.byte))
SimpleStructure['types']['i32'] = BaseType('i32', np.array(1, np.int32))
SimpleStructure['types']['ui32'] = BaseType('ui32', np.array(0, np.uint32))
SimpleStructure['types']['i16'] = BaseType('i16', np.array(0, np.int16))
SimpleStructure['types']['ui16'] = BaseType('ui16', np.array(0, np.uint16))
SimpleStructure['types']['f32'] = BaseType('f32', np.array(0.0, np.float32))
SimpleStructure['types']['f64'] = BaseType('f64', np.array(1000., np.float64))
SimpleStructure['types']['s'] = BaseType(
    's', np.array("This is a data test string (pass 0)."))
SimpleStructure['types']['u'] = BaseType('u', np.array("http://www.dods.org"))

# test grid
rain = DatasetType('test')
rain['rain'] = GridType('rain')
rain['rain']['rain'] = BaseType('rain',
                                np.arange(6).reshape(2, 3),
                                dimensions=('y', 'x'))
rain['rain']['x'] = BaseType('x', np.arange(3), units='degrees_east')
rain['rain']['y'] = BaseType('y', np.arange(2), units='degrees_north')

# test for ``bounds`` function
bounds = DatasetType('test')
bounds['sequence'] = SequenceType('sequence')
bounds['sequence']['lon'] = BaseType('lon', axis='X')
bounds['sequence']['lat'] = BaseType('lat', axis='Y')
bounds['sequence']['depth'] = BaseType('depth', axis='Z')
bounds['sequence']['time'] = BaseType('time',
                                      axis='T',
                                      units="days since 1970-01-01")
Exemplo n.º 16
0
SimpleStructure['types']['ub'] = BaseType('ub', np.array(10, np.ubyte))
SimpleStructure['types']['i32'] = BaseType('i32', np.array(-10, np.int32))
SimpleStructure['types']['ui32'] = BaseType('ui32', np.array(10, np.uint32))
SimpleStructure['types']['i16'] = BaseType('i16', np.array(-10, np.int16))
SimpleStructure['types']['ui16'] = BaseType('ui16', np.array(10, np.uint16))
SimpleStructure['types']['f32'] = BaseType('f32', np.array(100.0, np.float32))
SimpleStructure['types']['f64'] = BaseType('f64', np.array(1000., np.float64))
SimpleStructure['types']['s'] = BaseType(
    's', np.array("This is a data test string (pass 0)."))
SimpleStructure['types']['u'] = BaseType('u', np.array("http://www.dods.org"))
SimpleStructure['types']['U'] = BaseType('U',
                                         np.array(u"test unicode", np.unicode))

# test grid
rain = DatasetType('test')
rain['rain'] = GridType('rain')
rain['rain']['rain'] = BaseType('rain',
                                np.arange(6).reshape(2, 3),
                                dimensions=('y', 'x'))
rain['rain']['x'] = BaseType('x', np.arange(3), units='degrees_east')
rain['rain']['y'] = BaseType('y', np.arange(2), units='degrees_north')

# test for ``bounds`` function
bounds = DatasetType('test')
bounds['sequence'] = SequenceType('sequence')
bounds['sequence']['lon'] = BaseType('lon', axis='X')
bounds['sequence']['lat'] = BaseType('lat', axis='Y')
bounds['sequence']['depth'] = BaseType('depth', axis='Z')
bounds['sequence']['time'] = BaseType('time',
                                      axis='T',
                                      units="days since 1970-01-01")
Exemplo n.º 17
0
def dataset_from_str(name,
                     buffer_str,
                     mtime,
                     directory='.',
                     buffer_cache={},
                     dataset_cache={},
                     mtimes={},
                     known_infiles={}):
    from fstd2nc import Buffer
    from fstd2nc.mixins import _var_type, _axis_type, _dim_type
    from pydap.model import DatasetType, GridType, BaseType
    from os.path import basename, getmtime
    import numpy as np
    from collections import OrderedDict
    from datetime import datetime
    from argparse import ArgumentParser
    from os import chdir, path
    import shlex
    from glob import glob

    # Set the directory (to properly evaluate relative paths).
    chdir(directory)
    # Parse the arguments from the string.
    parser = ArgumentParser()
    parser.add_argument('infile', nargs='+')
    Buffer._cmdline_args(parser)
    buffer_args = shlex.split(buffer_str)
    buffer_args = parser.parse_args(buffer_args)
    buffer_args = vars(buffer_args)
    infiles = buffer_args.pop('infile')
    # Apply wildcard expansion to filenames.
    infiles = [
        f for filepattern in infiles
        for f in sorted(glob(filepattern)) or [filepattern]
    ]
    # Make sure the filenames are strings (not unicode).
    infiles = list(map(str, infiles))

    # Look at modification times of individual files.
    mtime = max(map(getmtime, infiles))

    # Return a cached version of the dataset if nothing about the file(s) have
    # changed since last time.
    if name in dataset_cache and mtime <= mtimes[name] and known_infiles[
            name] == infiles:
        return dataset_cache[name]

    mtimes[name] = mtime
    known_infiles[name] = infiles

    # Construct an fstd2nc Buffer object with the decoded FST data.
    buf = Buffer(infiles, **buffer_args)
    # Save a reference to the Buffer so the file reference(s) remain valid.
    buffer_cache[name] = buf

    # Get global metadata.
    global_metadata = buf._metadata.get('global', {})
    # Add history to global metadata.
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    history = timestamp + ": %s via Pydap+fstd2dap" % path.basename(name)
    global_metadata = {"history": history}

    # Construct a pydap Dataset object.
    dataset = DatasetType(name=path.basename(name),
                          attributes=dict(NC_GLOBAL=global_metadata))
    # Save this so it can be immediately returned next time it's requested.
    dataset_cache[name] = dataset

    # Split into vars / dims.
    buf = list(buf)
    variables = OrderedDict((var.name, var) for var in buf
                            if not isinstance(var, (_axis_type, _dim_type)))
    dims = OrderedDict((var.name, var) for var in buf
                       if isinstance(var, (_axis_type, _dim_type)))

    # Based loosely on pydap's builtin netcdf handler.
    for var in variables.values():
        # Add grids.
        dataset[var.name] = GridType(var.name, var.atts)
        # Add array.
        dataset[var.name][var.name] = BaseType(var.name, var.array, var.dims,
                                               var.atts)
        # Add maps.
        for dim in var.dims:
            if dim in dims:
                if hasattr(dims[dim], 'array'):
                    array = dims[dim].array
                    atts = dims[dim].atts
                else:
                    # Add "dummy" dimensions (or they're not interpreted properly by some
                    # clients like Panoply).
                    array = np.arange(len(dims[dim]))
                if hasattr(dims[dim], 'atts'):
                    atts = dims[dim].atts
                else:
                    atts = {}
                dataset[var.name][dim] = BaseType(dim, array, None, atts)

    for dim in dims.values():
        if hasattr(dim, 'array'):
            array = dim.array
        else:
            # Add "dummy" dimensions (or they're not interpreted properly by some
            # clients like Panoply).
            array = np.arange(len(dim))
        if hasattr(dim, 'atts'):
            atts = dim.atts
        else:
            atts = {}
        dataset[dim.name] = BaseType(dim.name, array, None, atts)
        # Handle unlimited dimension.
        if dim.name == 'time':
            dataset.attributes['DODS_EXTRA'] = {
                'Unlimited_Dimension': dim.name,
            }

    return dataset