Exemplo n.º 1
0
    def load_slice(i):
        loc = [slice(i, i + 1), slice(None), slice(None)]
        d = GridWorkflow.load(tile[loc], **kwargs)

        if mask_nodata:
            d = sensible_mask_invalid_data(d)

        # Load all masks and combine them all into one
        mask = None
        for m_tile, flags, load_args in masks:
            m = GridWorkflow.load(m_tile[loc], **load_args)
            m, *other = m.data_vars.values()
            m = make_mask(m, **flags)

            if mask is None:
                mask = m
            else:
                mask &= m

        if mask is not None:
            # Apply mask in place if asked or if we already performed
            # conversion to float32, this avoids reallocation of memory and
            # hence increases the largest data set size one can load without
            # running out of memory
            if mask_inplace or mask_nodata:
                d = sensible_where_inplace(d, mask)
            else:
                d = sensible_where(d, mask)

        if src_idx is not None:
            d.coords['source'] = ('time', np.repeat(src_idx, d.time.size))

        return d
Exemplo n.º 2
0
def load_masked_data(sub_tile_slice: Tuple[slice, slice, slice],
                     source_prod: DataSource,
                     geom=None) -> xarray.Dataset:
    data_fuse_func = import_function(
        source_prod.spec['fuse_func']
    ) if 'fuse_func' in source_prod.spec else None
    data = GridWorkflow.load(source_prod.data[sub_tile_slice],
                             measurements=source_prod.spec.get('measurements'),
                             fuse_func=data_fuse_func,
                             skip_broken_datasets=True)

    mask_inplace = source_prod.spec.get('mask_inplace', False)
    mask_nodata = source_prod.spec.get('mask_nodata', True)

    if mask_nodata:
        data = sensible_mask_invalid_data(data)

    # if all NaN
    completely_empty = all(
        ds for ds in xarray.ufuncs.isnan(data).all().data_vars.values())
    if completely_empty:
        # Discard empty slice
        return None

    if mask_inplace or not mask_nodata:
        where = sensible_where_inplace
    else:
        where = sensible_where

    if 'masks' in source_prod.spec:
        for mask_spec, mask_tile in zip(source_prod.spec['masks'],
                                        source_prod.masks):
            if mask_tile is None:
                # Discard data due to no mask data
                return None
            mask_fuse_func = import_function(
                mask_spec['fuse_func']) if 'fuse_func' in mask_spec else None
            mask = GridWorkflow.load(
                mask_tile[sub_tile_slice],
                measurements=[mask_spec['measurement']],
                fuse_func=mask_fuse_func,
                skip_broken_datasets=True)[mask_spec['measurement']]

            data = where(data, make_mask_from_spec(mask, mask_spec))
            del mask

    if geom is not None:
        data = where(data, geometry_mask([geom], data.geobox, invert=True))

    if source_prod.source_index is not None:
        data.coords['source'] = ('time',
                                 np.repeat(source_prod.source_index,
                                           data.time.size))

    return data
Exemplo n.º 3
0
def load_masked_data(sub_tile_slice: Tuple[slice, slice, slice],
                     source_prod: DataSource) -> xarray.Dataset:
    data_fuse_func = import_function(source_prod.spec['fuse_func']) if 'fuse_func' in source_prod.spec else None
    data = GridWorkflow.load(source_prod.data[sub_tile_slice],
                             measurements=source_prod.spec.get('measurements'),
                             fuse_func=data_fuse_func,
                             skip_broken_datasets=True)

    mask_inplace = source_prod.spec.get('mask_inplace', False)
    mask_nodata = source_prod.spec.get('mask_nodata', True)

    if mask_nodata:
        data = sensible_mask_invalid_data(data)

    # if all NaN
    completely_empty = all(ds for ds in xarray.ufuncs.isnan(data).all().data_vars.values())
    if completely_empty:
        # Discard empty slice
        return None

    if 'masks' in source_prod.spec:
        for mask_spec, mask_tile in zip(source_prod.spec['masks'], source_prod.masks):
            if mask_tile is None:
                # Discard data due to no mask data
                return None
            mask_fuse_func = import_function(mask_spec['fuse_func']) if 'fuse_func' in mask_spec else None
            mask = GridWorkflow.load(mask_tile[sub_tile_slice],
                                     measurements=[mask_spec['measurement']],
                                     fuse_func=mask_fuse_func,
                                     skip_broken_datasets=True)[mask_spec['measurement']]
            if mask_spec.get('flags') is not None:
                mask = make_mask(mask, **mask_spec['flags'])
            elif mask_spec.get('less_than') is not None:
                less_than = float(mask_spec['less_than'])
                mask = mask < less_than
            elif mask_spec.get('greater_than') is not None:
                greater_than = float(mask_spec['greater_than'])
                mask = mask > greater_than

            if mask_inplace:
                data = sensible_where_inplace(data, mask)
            else:
                data = sensible_where(data, mask)
            del mask

    if source_prod.source_index is not None:
        data.coords['source'] = ('time', np.repeat(source_prod.source_index, data.time.size))

    return data
Exemplo n.º 4
0
    def load_slice(i):
        loc = [slice(i, i + 1), slice(None), slice(None)]
        d = GridWorkflow.load(tile[loc], **kwargs)

        if mask_nodata:
            d = sensible_mask_invalid_data(d)

        # Load all masks and combine them all into one
        mask = None
        for (m_tile, flags, load_args), invert in zip(masks, inverts):
            m = GridWorkflow.load(m_tile[loc], **load_args)
            m, *other = m.data_vars.values()
            # TODO make use of make_mask_from_spec here
            m = make_mask(m, **flags)

            if invert:
                m = np.logical_not(m)

            if mask is None:
                mask = m
            else:
                mask &= m

        if mask_inplace or not mask_nodata:
            where = sensible_where_inplace
        else:
            where = sensible_where

        if mask is not None:
            # Apply mask in place if asked or if we already performed
            # conversion to float32, this avoids reallocation of memory and
            # hence increases the largest data set size one can load without
            # running out of memory
            d = where(d, mask)

        if geom is not None:
            d = where(d, geometry_mask([geom], d.geobox, invert=True))

        if src_idx is not None:
            d.coords['source'] = ('time', np.repeat(src_idx, d.time.size))

        return d