Exemplo n.º 1
0
def density(dataset, salinity, temperature, pressure):
    """Calculate in-situ density.

    This function calculated in-situ density from absolute salinity and
    conservative temperature, using the `gsw.rho` function. Returns a new
    sequence with the data.

    """
    # find sequence
    for sequence in walk(dataset, SequenceType):
        break
    else:
        raise ConstraintExpressionError(
            'Function "bounds" should be used on a Sequence.')

    selection = sequence[salinity.name, temperature.name, pressure.name]
    rows = [tuple(row) for row in selection]
    data = np.rec.fromrecords(rows,
                              names=['salinity', 'temperature', 'pressure'])
    rho = gsw.rho(data['salinity'], data['temperature'], data['pressure'])

    out = SequenceType("result")
    out['rho'] = BaseType("rho", units="kg/m**3")
    out.data = np.rec.fromrecords(rho.reshape(-1, 1), names=['rho'])
    return out
Exemplo n.º 2
0
def mean(dataset, var, axis=0):
    """Calculate the mean of an array along a given axis.

    The input variable should be either a ``GridType`` or ``BaseType``. The
    function will return an object of the same type with the mean applied.

    """
    if not isinstance(var, (GridType, BaseType)):
        raise ConstraintExpressionError(
            'Function "mean" should be used on an array or grid.')

    axis = int(axis)
    dims = tuple(dim for i, dim in enumerate(var.dimensions) if i != axis)

    # process basetype
    if isinstance(var, BaseType):
        return BaseType(name=var.name,
                        data=np.mean(var.data[:], axis=axis),
                        dimensions=dims,
                        attributes=var.attributes)

    # process grid
    out = GridType(name=var.name, attributes=var.attributes)
    out[var.array.name] = BaseType(name=var.array.name,
                                   data=np.mean(var.array.data[:], axis=axis),
                                   dimensions=dims,
                                   attributes=var.array.attributes)
    for dim in dims:
        out[dim] = BaseType(name=dim,
                            data=var[dim].data[:],
                            dimensions=(dim, ),
                            attributes=var[dim].attributes)
    return out
Exemplo n.º 3
0
def mean(dataset, var, axis=0):
    """
    Version 1.0

    Calculates the mean of an array along a given axis.

    """
    axis = int(axis)
    dims = tuple(dim for i, dim in enumerate(var.dimensions) if i != axis)

    if isinstance(var, BaseType):
        return BaseType(name=var.name,
                        data=np.mean(var.data[:], axis=axis),
                        dimensions=dims,
                        attributes=var.attributes)

    elif isinstance(var, GridType):
        out = GridType(name=var.name, attributes=var.attributes)
        out[var.array.name] = BaseType(name=var.array.name,
                                       data=np.mean(var.array.data[:],
                                                    axis=axis),
                                       dimensions=dims,
                                       attributes=var.array.attributes)
        for dim in dims:
            out[dim] = BaseType(name=dim,
                                data=var[dim].data[:],
                                dimensions=(dim, ),
                                attributes=var[dim].attributes)
        return out
    else:
        raise ConstraintExpressionError(
            'Function "mean" should be used on an array or grid.')
Exemplo n.º 4
0
def bounds(dataset, xmin, xmax, ymin, ymax, zmin, zmax, tmin, tmax):
    r"""Bound a sequence in space and time.

    This function is used by GrADS to access Sequences, eg:

        http://server.example.com/dataset.dods?sequence& \
                bounds(0,360,-90,90,500,500,00Z01JAN1970,00Z01JAN1970)

    We assume the dataset has only a single Sequence, which will be returned
    modified in place.

    """
    # find sequence
    for sequence in walk(dataset, SequenceType):
        break  # get first sequence
    else:
        raise ConstraintExpressionError(
            'Function "bounds" should be used on a Sequence.')

    for child in sequence.children():
        if child.attributes.get('axis', '').lower() == 'x':
            if xmin == xmax:
                sequence.data = sequence[child == xmin].data
            else:
                sequence.data = sequence[
                    (child >= xmin) & (child <= xmax)].data
        elif child.attributes.get('axis', '').lower() == 'y':
            if ymin == ymax:
                sequence.data = sequence[child == ymin].data
            else:
                sequence.data = sequence[
                    (child >= ymin) & (child <= ymax)].data
        elif child.attributes.get('axis', '').lower() == 'z':
            if zmin == zmax:
                sequence.data = sequence[child == zmin].data
            else:
                sequence.data = sequence[
                    (child >= zmin) & (child <= zmax)].data
        elif child.attributes.get('axis', '').lower() == 't':
            start = datetime.strptime(tmin, '%HZ%d%b%Y')
            end = datetime.strptime(tmax, '%HZ%d%b%Y')
            units = child.attributes.get('units', 'seconds since 1970-01-01')

            # if start and end are equal, add the step
            if start == end and 'grads_step' in child.attributes:
                dt = parse_step(child.attributes['grads_step'])
                end = start + dt
                tmin = coards.format(start, units)
                tmax = coards.format(end, units)
                sequence.data = sequence[
                    (child >= tmin) & (child < tmax)].data
            else:
                tmin = coards.format(start, units)
                tmax = coards.format(end, units)
                sequence.data = sequence[
                    (child >= tmin) & (child <= tmax)].data

    return sequence
Exemplo n.º 5
0
def apply_projection(projection, dataset):
    """Apply a given projection to a dataset.

    This function builds and returns a new dataset by adding those variables
    that were requested on the projection.

    """
    out = DatasetType(name=dataset.name, attributes=dataset.attributes)

    # first collect all the variables
    for p in projection:
        target, template = out, dataset
        for i, (name, slice_) in enumerate(p):
            candidate = template[name]

            # add variable to target
            if isinstance(candidate, StructureType):
                if name not in target.keys():
                    if i < len(p) - 1:
                        # if there are more children to add we need to clear
                        # the candidate so it has only explicitly added
                        # children; also, Grids are degenerated into Structures
                        if isinstance(candidate, GridType):
                            candidate = StructureType(candidate.name,
                                                      candidate.attributes)
                        candidate._keys = []
                    target[name] = candidate
                target, template = target[name], template[name]
            else:
                target[name] = candidate

    # fix sequence data to include only variables that are in the sequence
    for seq in walk(out, SequenceType):
        seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data

    # apply slices
    for p in projection:
        target = out
        for name, slice_ in p:
            target, parent = target[name], target

            if slice_:
                if isinstance(target, BaseType):
                    target.data = target[slice_]
                elif isinstance(target, SequenceType):
                    parent[name] = target[slice_[0]]
                elif isinstance(target, GridType):
                    parent[name] = target[slice_]
                else:
                    raise ConstraintExpressionError("Invalid projection!")

    return out
Exemplo n.º 6
0
def fix_shorthand(projection, dataset):
    """
    Fix shorthand notation in the projection.

    Some clients request variables by their name, not by the id. This is called
    the "shorthand notation", and it has to be fixed.

    """
    out = []
    for var in projection:
        if len(var) == 1 and var[0][0] not in dataset.keys():
            token, slice_ = var.pop(0)
            for child in walk(dataset):
                if token == child.name:
                    if var: raise ConstraintExpressionError(
                            'Ambiguous shorthand notation request: %s' % token)
                    var = [(parent, ()) for parent in
                            child.id.split('.')[:-1]] + [(token, slice_)]
        out.append(var)
    return out
Exemplo n.º 7
0
def fix_shn(projection, dataset):
    """
    Fix shorthand notation for variables.
    
    Shorthand notation is the syntax some clients use to retrieve data
    using the variable name instead of its fully qualified id.

    Here ``projection`` is a list as returned by ``parse_qs``.

    """
    out = []
    for var in projection:
        if len(var) == 1 and var[0][0] not in dataset:
            token, slice_ = var.pop(0)
            for child in walk(dataset):
                if token == child.name:
                    if var:
                        raise ConstraintExpressionError(
                            "Ambiguous shorthand notation request: %s" % token)
                    var = [(parent, ()) for parent in child.id.split('.')[:-1]
                           ] + [(token, slice_)]
        out.append(var)
    return out
Exemplo n.º 8
0
def parse_hyperslab(hyperslab):
    """Parse a hyperslab, returning a Python tuple of slices."""
    exprs = [expr for expr in hyperslab[1:-1].split('][') if expr]

    out = []
    for expr in exprs:
        tokens = list(map(int, expr.split(':')))
        start = tokens[0]
        step = 1

        if len(tokens) == 1:
            stop = start + 1
        elif len(tokens) == 2:
            stop = tokens[1] + 1
        elif len(tokens) == 3:
            step = tokens[1]
            stop = tokens[2] + 1
        else:
            raise ConstraintExpressionError("Invalid hyperslab %s" % hyperslab)

        out.append(slice(start, stop, step))

    return tuple(out)
Exemplo n.º 9
0
def build_filter(expression, template):
    """Return a filter function based on a comparison expression."""
    id1, op, id2 = re.split('(<=|>=|!=|=~|>|<|=)', str(expression), 1)

    # calculate the column index were filtering and how deep it is
    try:
        id1 = id1[len(template.id) + 1:]
        target = template
        for level, token in enumerate(id1.split(".")):
            parent1 = target.id
            keys = getattr(target, "_original_keys", target._keys)
            col = keys.index(token)
            target = target[token]
        a = operator.itemgetter(col)
    except:
        raise ConstraintExpressionError(
            'Invalid constraint expression: "{expression}" '
            '("{id}" is not a valid variable)'.format(expression=expression,
                                                      id=id1))

    # if we're comparing two variables they must be on the same sequence, so
    # ``parent1`` must be equal to ``parent2``
    if id2.rsplit(".", 1)[0] == parent1:  # parent2 == parent1
        keys = getattr(template, "_original_keys", template._keys)
        col = keys.index(id2.split(".")[-1])
        b = operator.itemgetter(col)
    else:
        try:
            value = ast.literal_eval(id2)

            def b(row):
                return value
        except:
            raise ConstraintExpressionError(
                'Invalid constraint expression: "{expression}" '
                '("{id}" is not valid)'.format(expression=expression, id=id2))

    op = {
        '<': operator.lt,
        '>': operator.gt,
        '!=': operator.ne,
        '=': operator.eq,
        '>=': operator.ge,
        '<=': operator.le,
        '=~': lambda a, b: re.match(b, a),
    }[op]

    # if the filtering is applied in the outermost sequence we can simply pass
    # a filter, and ignore the map
    if level == 0:

        def f(row):
            return op(a(row), b(row))

        def m(row):
            return row

    # if the filtering is applied to a nested sequence we actually need to map
    # the outer data so that the inner data is filtered
    else:
        f = bool

        def recurse(row, tokens, target):
            token = tokens.pop(0)

            # return the filtered inner data
            if not tokens:
                return [col for col in row if op(a(col), b(col))]

            # navigate inside the sequence
            col = target.keys().index(token)
            target = target[col]

            # modify data in place; we need to convert tuple to list
            row = list(row)
            row[col] = recurse(row[col], tokens, target)
            return tuple(row)

        def m(row):
            tokens = id1.split(".")
            return recurse(row, tokens, template)

    return f, m
Exemplo n.º 10
0
 def __or__(self, other):
     raise ConstraintExpressionError(
         "OR constraints not allowed in the Opendap specification.")