Пример #1
0
def constrain(dataset, ce):
    """
    A constraint expression applier.

        >>> dataset = DatasetType(name='test')
        >>> dataset['seq'] = SequenceType(name='seq')
        >>> dataset['seq']['index'] = BaseType(name='index', type=Int32)
        >>> dataset['seq']['temperature'] = BaseType(name='temperature', type=Float32)
        >>> dataset['seq']['site'] = BaseType(name='site', type=String)

        >>> dataset['seq'].data = [
        ...         (10, 17.2, 'Diamont_St'),
        ...         (11, 15.1, 'Blacktail_Loop'),
        ...         (12, 15.3, 'Platinum_St'),
        ...         (13, 15.1, 'Kodiak_Trail')]
        >>> for struct_ in dataset.seq:
        ...     print struct_.data
        (10, 17.2, 'Diamont_St')
        (11, 15.1, 'Blacktail_Loop')
        (12, 15.3, 'Platinum_St')
        (13, 15.1, 'Kodiak_Trail')

        >>> dataset2 = constrain(dataset, 'seq.index>11')
        >>> for struct_ in dataset2.seq:
        ...     print struct_.data
        (12, 15.3, 'Platinum_St')
        (13, 15.1, 'Kodiak_Trail')
        >>> dataset2 = constrain(dataset, 'seq.index>11&seq.temperature<15.2')
        >>> for struct_ in dataset2.seq:
        ...     print struct_.data
        (13, 15.1, 'Kodiak_Trail')

        >>> dataset.clear()
        >>> dataset['casts'] = SequenceType(name='casts')
        >>> dataset['casts']['lat'] = BaseType(name='lat', type=Float32)
        >>> dataset['casts']['lon'] = BaseType(name='lon', type=Float32)
        >>> dataset['casts']['time'] = BaseType(name='time', type=Float64)
        >>> dataset['casts']['profile'] = SequenceType(name='profile')
        >>> dataset['casts']['profile']['t'] = BaseType(name='t', type=Float32)
        >>> dataset['casts']['profile']['s'] = BaseType(name='s', type=Float32)
        >>> dataset['casts']['profile']['p'] = BaseType(name='p', type=Float32)
        >>> dataset['casts'].data = [
        ...         (-10.0, 290.0, 1.0, [(21.0, 35.0, 100.0), (20.5, 34.9, 200.0), (19.0, 33.0, 300.0)]),
        ...         (-11.0, 295.0, 2.0, [(22.0, 35.5, 100.0), (21.0, 35.4, 200.0), (20.0, 33.5, 300.0), (19.0, 33.0, 500.0)])]
        >>> dataset2 = constrain(dataset, 'casts.lat>-11')
        >>> for struct_ in dataset2.casts:
        ...     print struct_.data
        (-10.0, 290.0, 1.0, array([[21.0, 35.0, 100.0],
               [20.5, 34.9, 200.0],
               [19.0, 33.0, 300.0]], dtype=object))

    Filtering is guaranteed to work only in outer sequences, but not in inner
    sequences like this::

        >>> dataset2 = constrain(dataset, 'casts.profile.p>100')
        >>> for struct_ in dataset2.casts:
        ...     print struct_.data
        (-10.0, 290.0, 1.0, array([[21.0, 35.0, 100.0],
               [20.5, 34.9, 200.0],
               [19.0, 33.0, 300.0]], dtype=object))
        (-11.0, 295.0, 2.0, array([[22.0, 35.5, 100.0],
               [21.0, 35.4, 200.0],
               [20.0, 33.5, 300.0],
               [19.0, 33.0, 500.0]], dtype=object))
        
    Instead, inner sequences have to be filtered inside a loop::

        >>> for struct_ in dataset.casts:
        ...     for profile in struct_.profile[ struct_.profile.p > 100 ]:
        ...         print profile.data
        (20.5, 34.9, 200.0)
        (19.0, 33.0, 300.0)
        (21.0, 35.4, 200.0)
        (20.0, 33.5, 300.0)
        (19.0, 33.0, 500.0)

    """
    projection, selection = parse_qs(ce)
    projection = projection or [[(key, ())] for key in dataset.keys()]
    projection = fix_shn(projection, dataset)

    # Make a copy of the dataset.
    filtered = copy.deepcopy(dataset)

    # Filter sequences.
    for seq in walk(filtered, SequenceType):
        if seq._nesting_level == 1:
            filter_ = []
            # Check only selections that apply to the direct children of this sequence
            # (ie, skip children from nested sequences).
            for cond in [
                cond for cond in selection if re.match("%s\.[^\.]+(<=|<|>=|>|=|!=)" % re.escape(seq.id), cond)
            ]:
                id_, op, other = parse_selection(cond, dataset)
                filter_.append(op(id_, other))
            if filter_:
                seq.data = seq[reduce(lambda c1, c2: c1 & c2, filter_)].data

    # Create a new empty dataset to build it up.
    new_ = DatasetType(name=filtered.name, attributes=filtered.attributes.copy())

    for var in projection:
        target, template = new_, filtered
        while var:
            name, slice_ = var.pop(0)
            candidate = copy.deepcopy(template[name])
            if slice_:
                if isinstance(candidate, SequenceType):
                    candidate = candidate[slice_[0]]
                elif isinstance(candidate, BaseType):
                    candidate.data = candidate[slice_]
                    candidate.shape = candidate.data.shape
                else:
                    candidate = candidate[slice_]

            if isinstance(candidate, StructureType):
                if var:
                    # Convert degenerate grids into structures.
                    if isinstance(candidate, GridType):
                        candidate.__class__ = StructureType
                    candidate.clear()
                if name not in target or not var:
                    target[name] = candidate
                target, template = target[name], template[name]
            else:
                target[name] = candidate

    return new_
Пример #2
0
def open_url(url):
    """
    Open a given dataset URL, trying different response methods. 

    The function checks the stub DDX method, and falls back to the
    DDS+DAS responses. It can be easily extended for other representations
    like JSON.

    The URL should point to the dataset, omitting any response extensions
    like ``.dds``. Username and password can be passed in the URL like::

        http://user:[email protected]:port/path

    They will be transmitted as plaintext if the server supports only
    Basic authentication, so be careful. For Digest authentication this
    is safe.

    The URL can point directly to an Opendap dataset, or it can contain
    any number of contraint expressions (selection/projections)::

        http://example.com/dataset?var1,var2&var3>10

    You can also specify a cache directory, a timeout and a proxy using
    the global variables from ``pydap.lib``::

        >>> import pydap.lib
        >>> pydap.lib.TIMEOUT = 60  # seconds
        >>> pydap.lib.CACHE = '.cache'
        >>> import httplib2
        >>> from pydap.util import socks
        >>> pydap.lib.PROXY = httplib2.ProxyInfo(socks.PROXY_TYPE_HTTP, 'localhost', 8000)

    """
    for response in [_ddx, _ddsdas]:
        dataset = response(url)
        if dataset: break
    else:
        raise ClientError("Unable to open dataset.")

    # Remove any projections from the url, leaving selections.
    scheme, netloc, path, query, fragment = urlsplit(url)
    projection, selection = parse_qs(query)
    url = urlunsplit(
            (scheme, netloc, path, '&'.join(selection), fragment))

    # Set data to a Proxy object for BaseType and SequenceType. These
    # variables can then be sliced to retrieve the data on-the-fly.
    for var in walk(dataset, BaseType):
        var.data = ArrayProxy(var.id, url, var.shape)
    for var in walk(dataset, SequenceType):
        var.data = SequenceProxy(var.id, url)

    # Set server-side functions.
    dataset.functions = Functions(url)

    # Apply the corresponding slices.
    projection = fix_shn(projection, dataset)
    for var in projection:
        target = dataset
        while var:
            token, slice_ = var.pop(0)
            target = target[token]
            if slice_ and isinstance(target.data, VariableProxy):
                shape = getattr(target, 'shape', (sys.maxint,))
                target.data._slice = fix_slice(slice_, shape)

    return dataset
Пример #3
0
def constrain(dataset, ce):
    """
    A constraint expression applier.

        >>> dataset = DatasetType(name='test')
        >>> dataset['seq'] = SequenceType(name='seq')
        >>> dataset['seq']['index'] = BaseType(name='index', type=Int32)
        >>> dataset['seq']['temperature'] = BaseType(name='temperature', type=Float32)
        >>> dataset['seq']['site'] = BaseType(name='site', type=String)

        >>> dataset['seq'].data = [
        ...         (10, 17.2, 'Diamont_St'),
        ...         (11, 15.1, 'Blacktail_Loop'),
        ...         (12, 15.3, 'Platinum_St'),
        ...         (13, 15.1, 'Kodiak_Trail')]
        >>> for struct_ in dataset.seq:
        ...     print struct_.data
        (10, 17.2, 'Diamont_St')
        (11, 15.1, 'Blacktail_Loop')
        (12, 15.3, 'Platinum_St')
        (13, 15.1, 'Kodiak_Trail')

        >>> dataset2 = constrain(dataset, 'seq.index>11')
        >>> for struct_ in dataset2.seq:
        ...     print struct_.data
        (12, 15.3, 'Platinum_St')
        (13, 15.1, 'Kodiak_Trail')
        >>> dataset2 = constrain(dataset, 'seq.index>11&seq.temperature<15.2')
        >>> for struct_ in dataset2.seq:
        ...     print struct_.data
        (13, 15.1, 'Kodiak_Trail')

        >>> dataset.clear()
        >>> dataset['casts'] = SequenceType(name='casts')
        >>> dataset['casts']['lat'] = BaseType(name='lat', type=Float32)
        >>> dataset['casts']['lon'] = BaseType(name='lon', type=Float32)
        >>> dataset['casts']['time'] = BaseType(name='time', type=Float64)
        >>> dataset['casts']['profile'] = SequenceType(name='profile')
        >>> dataset['casts']['profile']['t'] = BaseType(name='t', type=Float32)
        >>> dataset['casts']['profile']['s'] = BaseType(name='s', type=Float32)
        >>> dataset['casts']['profile']['p'] = BaseType(name='p', type=Float32)
        >>> dataset['casts'].data = [
        ...         (-10.0, 290.0, 1.0, [(21.0, 35.0, 100.0), (20.5, 34.9, 200.0), (19.0, 33.0, 300.0)]),
        ...         (-11.0, 295.0, 2.0, [(22.0, 35.5, 100.0), (21.0, 35.4, 200.0), (20.0, 33.5, 300.0), (19.0, 33.0, 500.0)])]
        >>> dataset2 = constrain(dataset, 'casts.lat>-11')
        >>> for struct_ in dataset2.casts:
        ...     print struct_.data
        (-10.0, 290.0, 1.0, array([[21.0, 35.0, 100.0],
               [20.5, 34.9, 200.0],
               [19.0, 33.0, 300.0]], dtype=object))

    Filtering is guaranteed to work only in outer sequences, but not in inner
    sequences like this::

        >>> dataset2 = constrain(dataset, 'casts.profile.p>100')
        >>> for struct_ in dataset2.casts:
        ...     print struct_.data
        (-10.0, 290.0, 1.0, array([[21.0, 35.0, 100.0],
               [20.5, 34.9, 200.0],
               [19.0, 33.0, 300.0]], dtype=object))
        (-11.0, 295.0, 2.0, array([[22.0, 35.5, 100.0],
               [21.0, 35.4, 200.0],
               [20.0, 33.5, 300.0],
               [19.0, 33.0, 500.0]], dtype=object))
        
    Instead, inner sequences have to be filtered inside a loop::

        >>> for struct_ in dataset.casts:
        ...     for profile in struct_.profile[ struct_.profile.p > 100 ]:
        ...         print profile.data
        (20.5, 34.9, 200.0)
        (19.0, 33.0, 300.0)
        (21.0, 35.4, 200.0)
        (20.0, 33.5, 300.0)
        (19.0, 33.0, 500.0)

    """
    projection, selection = parse_qs(ce)
    projection = projection or [[(key, ())] for key in dataset.keys()]
    projection = fix_shn(projection, dataset)

    # Make a copy of the dataset.
    filtered = copy.deepcopy(dataset)

    # Filter sequences.
    for seq in walk(filtered, SequenceType):
        if seq._nesting_level == 1:
            filter_ = []
            # Check only selections that apply to the direct children of this sequence
            # (ie, skip children from nested sequences).
            for cond in [
                    cond for cond in selection if re.match(
                        '%s\.[^\.]+(<=|<|>=|>|=|!=)' % re.escape(seq.id), cond)
            ]:
                id_, op, other = parse_selection(cond, dataset)
                filter_.append(op(id_, other))
            if filter_:
                seq.data = seq[reduce(lambda c1, c2: c1 & c2, filter_)].data

    # Create a new empty dataset to build it up.
    new_ = DatasetType(name=filtered.name,
                       attributes=filtered.attributes.copy())

    for var in projection:
        target, template = new_, filtered
        while var:
            name, slice_ = var.pop(0)
            candidate = copy.deepcopy(template[name])
            if slice_:
                if isinstance(candidate, SequenceType):
                    candidate = candidate[slice_[0]]
                elif isinstance(candidate, BaseType):
                    candidate.data = candidate[slice_]
                    candidate.shape = candidate.data.shape
                else:
                    candidate = candidate[slice_]

            if isinstance(candidate, StructureType):
                if var:
                    # Convert degenerate grids into structures.
                    if isinstance(candidate, GridType):
                        candidate.__class__ = StructureType
                    candidate.clear()
                if name not in target or not var:
                    target[name] = candidate
                target, template = target[name], template[name]
            else:
                target[name] = candidate

    return new_