Пример #1
0
def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y):
    """Calculate vertical and horizontal fractional distances t and s"""

    # General case, ie. where the the corners form an irregular rectangle
    t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x)

    # Cases where verticals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)

    if da.any(idxs):
        t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2,
                                                      pt_3, pt_4,
                                                      out_y, out_x)

        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    # Cases where both verticals and horizontals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)
    if da.any(idxs):
        t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3,
                                                   out_y, out_x)
        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1)
    t__ = da.where(idxs, np.nan, t__)
    s__ = da.where(idxs, np.nan, s__)

    return t__, s__
Пример #2
0
    def test_get_corner_dask(self):
        """Test finding the closest corners."""
        import dask.array as da
        from pyresample.bilinear.xarr import (_get_corner_dask,
                                              _get_input_xy_dask)
        from pyresample import CHUNK_SIZE
        from pyresample._spatial_mp import Proj

        proj = Proj(self.target_def.proj_str)
        in_x, in_y = _get_input_xy_dask(self.source_def, proj,
                                        da.from_array(self.valid_input_index),
                                        da.from_array(self.index_array))
        out_x, out_y = self.target_def.get_proj_coords(chunks=CHUNK_SIZE)
        out_x = da.ravel(out_x)
        out_y = da.ravel(out_y)

        # Some copy&paste from the code to get the input
        out_x_tile = np.reshape(np.tile(out_x, self.neighbours),
                                (self.neighbours, out_x.size)).T
        out_y_tile = np.reshape(np.tile(out_y, self.neighbours),
                                (self.neighbours, out_y.size)).T
        x_diff = out_x_tile - in_x
        y_diff = out_y_tile - in_y
        stride = np.arange(x_diff.shape[0])

        # Use lower left source pixels for testing
        valid = (x_diff > 0) & (y_diff > 0)
        x_3, y_3, idx_3 = _get_corner_dask(stride, valid, in_x, in_y,
                                           da.from_array(self.index_array))

        self.assertTrue(
            x_3.shape == y_3.shape == idx_3.shape == (self.target_def.size, ))
        # Four locations have no data to the lower left of them (the
        # bottom row of the area
        self.assertEqual(np.sum(np.isnan(x_3.compute())), 4)
Пример #3
0
def _get_input_xy_dask(source_geo_def, proj, input_idxs, idx_ref):
    """Get x/y coordinates for the input area and reduce the data."""
    in_lons, in_lats = source_geo_def.get_lonlats_dask()

    # Mask invalid values
    in_lons, in_lats = _mask_coordinates_dask(in_lons, in_lats)

    # Select valid locations
    # TODO: direct indexing w/o .compute() results in
    # "ValueError: object too deep for desired array

    in_lons = da.ravel(in_lons)
    in_lons = in_lons.compute()
    in_lons = in_lons[input_idxs]
    in_lats = da.ravel(in_lats)
    in_lats = in_lats.compute()
    in_lats = in_lats[input_idxs]

    # Expand input coordinates for each output location
    # in_lons = in_lons.compute()
    in_lons = in_lons[idx_ref]
    # in_lats = in_lats.compute()
    in_lats = in_lats[idx_ref]

    # Convert coordinates to output projection x/y space
    in_x, in_y = proj(in_lons, in_lats)

    return in_x, in_y
Пример #4
0
    def test_get_bounding_corners_dask(self):
        """Test finding surrounding bounding corners."""
        import dask.array as da
        from pyresample.bilinear.xarr import (_get_input_xy_dask,
                                              _get_bounding_corners_dask)
        from pyresample._spatial_mp import Proj
        from pyresample import CHUNK_SIZE

        proj = Proj(self.target_def.proj_str)
        out_x, out_y = self.target_def.get_proj_coords(chunks=CHUNK_SIZE)
        out_x = da.ravel(out_x)
        out_y = da.ravel(out_y)
        in_x, in_y = _get_input_xy_dask(self.source_def, proj,
                                        da.from_array(self.valid_input_index),
                                        da.from_array(self.index_array))
        pt_1, pt_2, pt_3, pt_4, ia_ = _get_bounding_corners_dask(
            in_x, in_y, out_x, out_y, self.neighbours,
            da.from_array(self.index_array))

        self.assertTrue(pt_1.shape == pt_2.shape == pt_3.shape == pt_4.shape ==
                        (self.target_def.size, 2))
        self.assertTrue(ia_.shape == (self.target_def.size, 4))

        # Check which of the locations has four valid X/Y pairs by
        # finding where there are non-NaN values
        res = da.sum(pt_1 + pt_2 + pt_3 + pt_4, axis=1).compute()
        self.assertEqual(np.sum(~np.isnan(res)), 10)
Пример #5
0
def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y):
    """Calculate vertical and horizontal fractional distances t and s"""

    # General case, ie. where the the corners form an irregular rectangle
    t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x)

    # Cases where verticals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)

    if da.any(idxs):
        t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2,
                                                      pt_3, pt_4,
                                                      out_y, out_x)

        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    # Cases where both verticals and horizontals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)
    if da.any(idxs):
        t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3,
                                                   out_y, out_x)
        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1)
    t__ = da.where(idxs, np.nan, t__)
    s__ = da.where(idxs, np.nan, s__)

    return t__, s__
Пример #6
0
def _get_input_xy_dask(source_geo_def, proj, input_idxs, idx_ref):
    """Get x/y coordinates for the input area and reduce the data."""
    in_lons, in_lats = source_geo_def.get_lonlats_dask()

    # Mask invalid values
    in_lons, in_lats = _mask_coordinates_dask(in_lons, in_lats)

    # Select valid locations
    # TODO: direct indexing w/o .compute() results in
    # "ValueError: object too deep for desired array

    in_lons = da.ravel(in_lons)
    in_lons = in_lons.compute()
    in_lons = in_lons[input_idxs]
    in_lats = da.ravel(in_lats)
    in_lats = in_lats.compute()
    in_lats = in_lats[input_idxs]

    # Expand input coordinates for each output location
    # in_lons = in_lons.compute()
    in_lons = in_lons[idx_ref]
    # in_lats = in_lats.compute()
    in_lats = in_lats[idx_ref]

    # Convert coordinates to output projection x/y space
    in_x, in_y = proj(in_lons, in_lats)

    return in_x, in_y
Пример #7
0
def _get_raveled_lonlats(geo_def):
    lons, lats = geo_def.get_lonlats(chunks=CHUNK_SIZE)
    if lons.size == 0 or lats.size == 0:
        raise ValueError('Cannot resample empty data set')
    elif lons.size != lats.size or lons.shape != lats.shape:
        raise ValueError('Mismatch between lons and lats')

    return da.ravel(lons), da.ravel(lats)
Пример #8
0
def test_ravel():
    x = np.random.randint(10, size=(4, 6))

    # 2d
    # these should use the shortcut
    for chunks in [(4, 6), (2, 6)]:
        a = from_array(x, chunks=chunks)
        assert eq(x.ravel(), a.ravel())
        assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0])
    # these cannot
    for chunks in [(4, 2), (2, 2)]:
        a = from_array(x, chunks=chunks)
        assert eq(x.ravel(), a.ravel())
        assert len(a.ravel().dask) > len(a.dask) + len(a.chunks[0])

    # 0d
    assert eq(x[0, 0].ravel(), a[0, 0].ravel())

    # 1d
    a_flat = a.ravel()
    assert a_flat.ravel() is a_flat

    # 3d
    x = np.random.randint(10, size=(2, 3, 4))
    for chunks in [2, 4, (2, 3, 2), (1, 3, 4)]:
        a = from_array(x, chunks=chunks)
        assert eq(x.ravel(), a.ravel())

    assert eq(x.flatten(), a.flatten())
    assert eq(np.ravel(x), da.ravel(a))
Пример #9
0
def query_no_distance(target_lons, target_lats, valid_output_index, kdtree,
                      neighbours, epsilon, radius):
    """Query the kdtree. No distances are returned."""
    voi = valid_output_index
    shape = voi.shape
    voir = da.ravel(voi)
    target_lons_valid = da.ravel(target_lons)[voir]
    target_lats_valid = da.ravel(target_lats)[voir]

    coords = lonlat2xyz(target_lons_valid, target_lats_valid)
    distance_array, index_array = kdtree.query(coords.compute(),
                                               k=neighbours,
                                               eps=epsilon,
                                               distance_upper_bound=radius)

    return index_array
Пример #10
0
    def _create_resample_kdtree(self):
        """Set up kd tree on input"""
        # Get input information
        valid_input_index, source_lons, source_lats = \
            _get_valid_input_index_dask(self.source_geo_def,
                                        self.target_geo_def,
                                        self.reduce_data,
                                        self.radius_of_influence,
                                        nprocs=self.nprocs)

        # FIXME: Is dask smart enough to only compute the pixels we end up
        #        using even with this complicated indexing
        input_coords = lonlat2xyz(source_lons, source_lats)
        valid_input_index = da.ravel(valid_input_index)
        input_coords = input_coords[valid_input_index, :]
        input_coords = input_coords.compute()
        # Build kd-tree on input
        input_coords = input_coords.astype(np.float)
        valid_input_index, input_coords = da.compute(valid_input_index,
                                                     input_coords)
        if kd_tree_name == 'pykdtree':
            resample_kdtree = KDTree(input_coords)
        else:
            resample_kdtree = sp.cKDTree(input_coords)

        return valid_input_index, resample_kdtree
Пример #11
0
def test_ravel():
    x = np.random.randint(10, size=(4, 6))

    # 2d
    # these should use the shortcut
    for chunks in [(4, 6), (2, 6)]:
        a = from_array(x, chunks=chunks)
        assert eq(x.ravel(), a.ravel())
        assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0])
    # these cannot
    for chunks in [(4, 2), (2, 2)]:
        a = from_array(x, chunks=chunks)
        assert eq(x.ravel(), a.ravel())
        assert len(a.ravel().dask) > len(a.dask) + len(a.chunks[0])

    # 0d
    assert eq(x[0, 0].ravel(), a[0, 0].ravel())

    # 1d
    a_flat = a.ravel()
    assert a_flat.ravel() is a_flat

    # 3d
    x = np.random.randint(10, size=(2, 3, 4))
    for chunks in [2, 4, (2, 3, 2), (1, 3, 4)]:
        a = from_array(x, chunks=chunks)
        assert eq(x.ravel(), a.ravel())

    assert eq(x.flatten(), a.flatten())
    assert eq(np.ravel(x), da.ravel(a))
Пример #12
0
def _get_valid_input_index_dask(source_geo_def,
                                target_geo_def,
                                reduce_data,
                                radius_of_influence,
                                nprocs=1):
    """Find indices of reduced inputput data"""

    source_lons, source_lats = source_geo_def.get_lonlats_dask()
    source_lons = da.ravel(source_lons)
    source_lats = da.ravel(source_lats)

    if source_lons.size == 0 or source_lats.size == 0:
        raise ValueError('Cannot resample empty data set')
    elif source_lons.size != source_lats.size or \
            source_lons.shape != source_lats.shape:
        raise ValueError('Mismatch between lons and lats')

    # Remove illegal values
    valid_input_index = ((source_lons >= -180) & (source_lons <= 180) &
                         (source_lats <= 90) & (source_lats >= -90))

    if reduce_data:
        # Reduce dataset
        if (isinstance(source_geo_def, geometry.CoordinateDefinition) and
            isinstance(target_geo_def, (geometry.GridDefinition,
                                        geometry.AreaDefinition))) or \
           (isinstance(source_geo_def, (geometry.GridDefinition,
                                        geometry.AreaDefinition)) and
            isinstance(target_geo_def, (geometry.GridDefinition,
                                        geometry.AreaDefinition))):
            # Resampling from swath to grid or from grid to grid
            lonlat_boundary = target_geo_def.get_boundary_lonlats()

            # Combine reduced and legal values
            valid_input_index &= \
                data_reduce.get_valid_index_from_lonlat_boundaries(
                    lonlat_boundary[0],
                    lonlat_boundary[1],
                    source_lons, source_lats,
                    radius_of_influence)

    if (isinstance(valid_input_index, np.ma.core.MaskedArray)):
        # Make sure valid_input_index is not a masked array
        valid_input_index = valid_input_index.filled(False)

    return valid_input_index, source_lons, source_lats
Пример #13
0
def query_no_distance(target_lons, target_lats,
                      valid_output_index, kdtree, neighbours, epsilon, radius):
    """Query the kdtree. No distances are returned."""
    voi = valid_output_index
    shape = voi.shape
    voir = da.ravel(voi)
    target_lons_valid = da.ravel(target_lons)[voir]
    target_lats_valid = da.ravel(target_lats)[voir]

    coords = lonlat2xyz(target_lons_valid, target_lats_valid)
    distance_array, index_array = kdtree.query(
        coords.compute(),
        k=neighbours,
        eps=epsilon,
        distance_upper_bound=radius)

    return index_array
Пример #14
0
def _get_valid_input_index_dask(source_geo_def,
                                target_geo_def,
                                reduce_data,
                                radius_of_influence,
                                nprocs=1):
    """Find indices of reduced inputput data"""

    source_lons, source_lats = source_geo_def.get_lonlats_dask()
    source_lons = da.ravel(source_lons)
    source_lats = da.ravel(source_lats)

    if source_lons.size == 0 or source_lats.size == 0:
        raise ValueError('Cannot resample empty data set')
    elif source_lons.size != source_lats.size or \
            source_lons.shape != source_lats.shape:
        raise ValueError('Mismatch between lons and lats')

    # Remove illegal values
    valid_input_index = ((source_lons >= -180) & (source_lons <= 180) &
                         (source_lats <= 90) & (source_lats >= -90))

    if reduce_data:
        # Reduce dataset
        if (isinstance(source_geo_def, geometry.CoordinateDefinition) and
            isinstance(target_geo_def, (geometry.GridDefinition,
                                        geometry.AreaDefinition))) or \
           (isinstance(source_geo_def, (geometry.GridDefinition,
                                        geometry.AreaDefinition)) and
            isinstance(target_geo_def, (geometry.GridDefinition,
                                        geometry.AreaDefinition))):
            # Resampling from swath to grid or from grid to grid
            lonlat_boundary = target_geo_def.get_boundary_lonlats()

            # Combine reduced and legal values
            valid_input_index &= \
                data_reduce.get_valid_index_from_lonlat_boundaries(
                    lonlat_boundary[0],
                    lonlat_boundary[1],
                    source_lons, source_lats,
                    radius_of_influence)

    if (isinstance(valid_input_index, np.ma.core.MaskedArray)):
        # Make sure valid_input_index is not a masked array
        valid_input_index = valid_input_index.filled(False)

    return valid_input_index, source_lons, source_lats
Пример #15
0
def _get_output_xy_dask(target_geo_def, proj):
    """Get x/y coordinates of the target grid."""
    # Read output coordinates
    out_lons, out_lats = target_geo_def.get_lonlats_dask()

    # Mask invalid coordinates
    out_lons, out_lats = _mask_coordinates_dask(out_lons, out_lats)

    # Convert coordinates to output projection x/y space
    res = da.dstack(proj(out_lons.compute(), out_lats.compute()))
    # _run_proj(proj, out_lons, out_lats)
    #,
    #                    chunks=(out_lons.chunks[0], out_lons.chunks[1], 2),
    #                    new_axis=[2])
    out_x = da.ravel(res[:, :, 0])
    out_y = da.ravel(res[:, :, 1])

    return out_x, out_y
Пример #16
0
def _get_output_xy_dask(target_geo_def, proj):
    """Get x/y coordinates of the target grid."""
    # Read output coordinates
    out_lons, out_lats = target_geo_def.get_lonlats_dask()

    # Mask invalid coordinates
    out_lons, out_lats = _mask_coordinates_dask(out_lons, out_lats)

    # Convert coordinates to output projection x/y space
    res = da.dstack(proj(out_lons.compute(), out_lats.compute()))
    # _run_proj(proj, out_lons, out_lats)
    #,
    #                    chunks=(out_lons.chunks[0], out_lons.chunks[1], 2),
    #                    new_axis=[2])
    out_x = da.ravel(res[:, :, 0])
    out_y = da.ravel(res[:, :, 1])

    return out_x, out_y
Пример #17
0
def _check_data_shape_dask(data, input_idxs):
    """Check data shape and adjust if necessary."""
    # Handle multiple datasets
    if data.ndim > 2 and data.shape[0] * data.shape[1] == input_idxs.shape[0]:
        data = da.reshape(data, data.shape[0] * data.shape[1], data.shape[2])
    # Also ravel single dataset
    elif data.shape[0] != input_idxs.size:
        data = da.ravel(data)

    # Ensure two dimensions
    if data.ndim == 1:
        data = da.reshape(data, (data.size, 1))

    return data
Пример #18
0
def _check_data_shape_dask(data, input_idxs):
    """Check data shape and adjust if necessary."""
    # Handle multiple datasets
    if data.ndim > 2 and data.shape[0] * data.shape[1] == input_idxs.shape[0]:
        data = da.reshape(data, data.shape[0] * data.shape[1], data.shape[2])
    # Also ravel single dataset
    elif data.shape[0] != input_idxs.size:
        data = da.ravel(data)

    # Ensure two dimensions
    if data.ndim == 1:
        data = da.reshape(data, (data.size, 1))

    return data
Пример #19
0
def _contains_cftime_datetimes(array):
    """Check if an array contains cftime.datetime objects"""
    try:
        from cftime import datetime as cftime_datetime
    except ImportError:
        return False
    else:
        if array.dtype == np.dtype('O') and array.size > 0:
            sample = array.ravel()[0]
            if isinstance(sample, dask_array_type):
                sample = sample.compute()
                if isinstance(sample, np.ndarray):
                    sample = sample.item()
            return isinstance(sample, cftime_datetime)
        else:
            return False
Пример #20
0
def _contains_cftime_datetimes(array) -> bool:
    """Check if an array contains cftime.datetime objects
    """
    try:
        from cftime import datetime as cftime_datetime
    except ImportError:
        return False
    else:
        if array.dtype == np.dtype('O') and array.size > 0:
            sample = array.ravel()[0]
            if isinstance(sample, dask_array_type):
                sample = sample.compute()
                if isinstance(sample, np.ndarray):
                    sample = sample.item()
            return isinstance(sample, cftime_datetime)
        else:
            return False
Пример #21
0
def anomalies(cube, period):
    """
    Compute anomalies using a mean with the specified granularity.

    Computes anomalies based on daily, monthly, seasonal or yearly means for
    the full available period

    Parameters
    ----------
    cube: iris.cube.Cube
        input cube.

    period: str, optional
        Period to compute the statistic over.
        Available periods: 'full', 'season', 'seasonal', 'monthly', 'month',
        'mon', 'daily', 'day'

    Returns
    -------
    iris.cube.Cube
        Monthly statistics cube
    """
    reference = climate_statistics(cube, period=period)
    if period in ['full']:
        return cube - reference

    cube_coord = _get_period_coord(cube, period)
    ref_coord = _get_period_coord(reference, period)

    data = cube.core_data()
    cube_time = cube.coord('time')
    ref = {}
    for ref_slice in reference.slices_over(ref_coord):
        ref[ref_slice.coord(ref_coord).points[0]] = da.ravel(
            ref_slice.core_data())
    cube_coord_dim = cube.coord_dims(cube_coord)[0]
    for i in range(cube_time.shape[0]):
        time = cube_time.points[i]
        indexes = cube_time.points == time
        indexes = iris.util.broadcast_to_shape(indexes, data.shape,
                                               (cube_coord_dim, ))
        data[indexes] = data[indexes] - ref[cube_coord.points[i]]

    cube = cube.copy(data)
    return cube
Пример #22
0
def coords_to_point_array(coords: List[Any]) -> np.ndarray:
    """Re-arrange data from a list of xarray coordinates into a 2-d array of shape
    (npoints, ncoords).

    """
    c_chunks = [c.chunks for c in coords]

    if any([chunks is None for chunks in c_chunks]):
        # plain numpy arrays (maybe triggers compute)
        X = np.stack([np.ravel(c) for c in coords]).T

    else:
        import dask.array as da

        # TODO: check chunks are equal for all coords?

        X = da.stack([da.ravel(c.data) for c in coords]).T
        X = X.rechunk((X.chunks[0], len(coords)))

    return X
Пример #23
0
    def _create_resample_kdtree(self):
        """Set up kd tree on input"""
        # Get input information
        valid_input_index, source_lons, source_lats = \
            _get_valid_input_index_dask(self.source_geo_def,
                                        self.target_geo_def,
                                        self.reduce_data,
                                        self.radius_of_influence,
                                        nprocs=self.nprocs)

        # FIXME: Is dask smart enough to only compute the pixels we end up
        #        using even with this complicated indexing
        input_coords = lonlat2xyz(source_lons, source_lats)
        valid_input_index = da.ravel(valid_input_index)
        input_coords = input_coords[valid_input_index, :]
        input_coords = input_coords.compute()
        # Build kd-tree on input
        input_coords = input_coords.astype(np.float)
        valid_input_index, input_coords = da.compute(valid_input_index,
                                                     input_coords)
        return valid_input_index, KDTree(input_coords)
def test_ravel():
    x = np.random.randint(10, size=(4, 6))

    # 2d
    for chunks in [(4, 6), (2, 6)]:
        a = da.from_array(x, chunks=chunks)
        assert_eq(x.ravel(), a.ravel())
        assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0])

    # 0d
    assert_eq(x[0, 0].ravel(), a[0, 0].ravel())

    # 1d
    a_flat = a.ravel()
    assert_eq(a_flat.ravel(), a_flat)

    # 3d
    x = np.random.randint(10, size=(2, 3, 4))
    for chunks in [4, (1, 3, 4)]:
        a = da.from_array(x, chunks=chunks)
        assert_eq(x.ravel(), a.ravel())

    assert_eq(x.flatten(), a.flatten())
    assert_eq(np.ravel(x), da.ravel(a))
Пример #25
0
def test_ravel():
    x = np.random.randint(10, size=(4, 6))

    # 2d
    for chunks in [(4, 6), (2, 6)]:
        a = da.from_array(x, chunks=chunks)
        assert_eq(x.ravel(), a.ravel())
        assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0])

    # 0d
    assert_eq(x[0, 0].ravel(), a[0, 0].ravel())

    # 1d
    a_flat = a.ravel()
    assert_eq(a_flat.ravel(), a_flat)

    # 3d
    x = np.random.randint(10, size=(2, 3, 4))
    for chunks in [4, (1, 3, 4)]:
        a = da.from_array(x, chunks=chunks)
        assert_eq(x.ravel(), a.ravel())

    assert_eq(x.flatten(), a.flatten())
    assert_eq(np.ravel(x), da.ravel(a))
Пример #26
0
def main(argv=None):

    #     cluster = LocalCluster(dashboard_address=None)
    #     client = Client(cluster, memory_limit='{}GB'.format(FLAGS.memory_limit),
    #                     processes=False)

    K.set_floatx('float32')

    chunk_size = FLAGS.chunk_size

    # Read data set
    hdf5_file = h5py.File(FLAGS.data_file, 'r')
    images, labels, _ = hdf52dask(hdf5_file,
                                  FLAGS.group,
                                  chunk_size,
                                  shuffle=FLAGS.shuffle,
                                  seed=FLAGS.seed,
                                  pct=FLAGS.pct)
    n_images = images.shape[0]
    n_batches = int(np.ceil(n_images / float(FLAGS.batch_size)))

    # Data augmentation parameters
    daug_params_file = get_daug_scheme_path(FLAGS.daug_params, FLAGS.data_file)
    daug_params = yaml.load(open(daug_params_file, 'r'),
                            Loader=yaml.FullLoader)
    nodaug_params_file = get_daug_scheme_path('nodaug.yml', FLAGS.data_file)
    nodaug_params = yaml.load(open(nodaug_params_file, 'r'),
                              Loader=yaml.FullLoader)

    # Initialize the network model
    model_filename = FLAGS.model
    model = load_model(model_filename)

    # Print the model summary
    model.summary()

    # Get relevant layers
    if FLAGS.store_input:
        layer_regex = '({}|.*input.*)'.format(FLAGS.layer_regex)
    else:
        layer_regex = FLAGS.layer_regex

    layers = [
        layer.name for layer in model.layers
        if re.compile(layer_regex).match(layer.name)
    ]

    # Create batch generators
    n_daug_rep = FLAGS.n_daug_rep
    n_diff_per_batch = int(FLAGS.batch_size / n_daug_rep)
    image_gen_daug = get_generator(images, **daug_params)
    batch_gen_daug = batch_generator(image_gen_daug,
                                     images,
                                     labels,
                                     batch_size=n_diff_per_batch,
                                     aug_per_im=n_daug_rep,
                                     shuffle=False)
    image_gen_nodaug = get_generator(images, **nodaug_params)
    batch_gen_nodaug = batch_generator(image_gen_nodaug,
                                       images,
                                       labels,
                                       FLAGS.batch_size,
                                       aug_per_im=1,
                                       shuffle=False)

    # Outputs
    if FLAGS.output_dir == '-1':
        FLAGS.output_dir = os.path.dirname(FLAGS.model)

    output_hdf5 = h5py.File(
        os.path.join(FLAGS.output_dir, FLAGS.output_mse_matrix_hdf5), 'w')
    output_pickle = os.path.join(FLAGS.output_dir, FLAGS.output_pickle)
    df_init_idx = 0
    df = pd.DataFrame()

    # Iterate over the layers
    for layer_idx, layer_name in enumerate(layers):

        # Reload the model
        if layer_idx > 0:
            K.clear_session()
            model = load_model(model_filename)

        layer = model.get_layer(layer_name)

        # Rename input layer
        if re.compile('.*input.*').match(layer_name):
            layer_name = 'input'

        hdf5_layer = output_hdf5.create_group(layer_name)

        activation_function = K.function(
            [model.input, K.learning_phase()], [layer.output])

        print('\nComputing pairwise similarity at layer {}'.format(layer_name))

        # Compute activations of original data (without augmentation)
        a_nodaug_da = get_activations(activation_function, batch_gen_nodaug)
        a_nodaug_da = da.squeeze(a_nodaug_da)
        a_nodaug_da = da.rechunk(a_nodaug_da,
                                 (chunk_size, ) + (a_nodaug_da.shape[1:]))
        dim_activations = a_nodaug_da.shape[1]

        # Comute matrix of similarities
        r = da.reshape(da.sum(da.square(a_nodaug_da), axis=1), (-1, 1))
        mse_matrix = (r - 2 * da.dot(a_nodaug_da,
                                     da.transpose(a_nodaug_da)) \
                     + da.transpose(r)) / dim_activations

        # Compute activations with augmentation
        a_daug_da = get_activations(activation_function, batch_gen_daug)
        a_daug_da = da.rechunk(a_daug_da, (chunk_size, dim_activations, 1))

        # Compute similarity of augmentations with respect to the
        # activations of the original data
        a_nodaug_da = da.repeat(da.reshape(a_nodaug_da,
                                           a_nodaug_da.shape + (1, )),
                                repeats=n_daug_rep,
                                axis=2)
        a_nodaug_da = da.rechunk(a_nodaug_da, (chunk_size, dim_activations, 1))
        mse_daug = da.mean(da.square(a_nodaug_da - a_daug_da), axis=1)

        # Compute invariance score
        mse_sum = da.repeat(da.reshape(da.sum(mse_matrix, axis=1),
                                       (n_images, 1)),
                            repeats=n_daug_rep,
                            axis=1)
        mse_sum = da.rechunk(mse_sum, (chunk_size, 1))
        invariance = 1 - n_images * da.divide(mse_daug, mse_sum)

        print('Dimensionality activations: {}x{}x{}'.format(
            n_images, dim_activations, n_daug_rep))

        # Store HDF5 file
        if FLAGS.output_mse_matrix_hdf5:
            mse_matrix_ds = hdf5_layer.create_dataset(
                'mse_matrix',
                shape=mse_matrix.shape,
                chunks=mse_matrix.chunksize,
                dtype=K.floatx())
            mse_daug_ds = hdf5_layer.create_dataset('mse_daug',
                                                    shape=mse_daug.shape,
                                                    chunks=mse_daug.chunksize,
                                                    dtype=K.floatx())
            invariance_ds = hdf5_layer.create_dataset(
                'invariance',
                shape=invariance.shape,
                chunks=invariance.chunksize,
                dtype=K.floatx())
            time_init = time()
            with ProgressBar(dt=1):
                da.store([mse_matrix, mse_daug, invariance],
                         [mse_matrix_ds, mse_daug_ds, invariance_ds])
            time_end = time()
            print('Elapsed time: {}'.format(time_end - time_init))

            invariance = np.ravel(
                np.asarray(output_hdf5[layer_name]['invariance']))
        else:
            time_init = time()
            invariance = da.ravel(invariance).compute()
            time_end = time()
            print('Elapsed time: {}'.format(time_end - time_init))

        # Update pandas data frame for plotting
        df_end_idx = df_init_idx + n_images * n_daug_rep
        d = pd.DataFrame(
            {
                'Layer': layer_name,
                'sample': np.repeat(np.arange(n_images), n_daug_rep),
                'n_daug': np.tile(np.arange(n_daug_rep), n_images),
                'invariance': invariance
            },
            index=np.arange(df_init_idx, df_end_idx).tolist())
        df = df.append(d)
        df_init_idx += df_end_idx

    pickle.dump(df, open(output_pickle, 'wb'))
    output_hdf5.close()
Пример #27
0
    def get_bil_info(self):
        """Return neighbour info.

        Returns
        -------
        t__ : numpy array
            Vertical fractional distances from corner to the new points
        s__ : numpy array
            Horizontal fractional distances from corner to the new points
        valid_input_index : numpy array
            Valid indices in the input data
        index_array : numpy array
            Mapping array from valid source points to target points

        """
        if self.source_geo_def.size < self.neighbours:
            warnings.warn('Searching for %s neighbours in %s data points' %
                          (self.neighbours, self.source_geo_def.size))

        # Create kd-tree
        valid_input_index, resample_kdtree = self._create_resample_kdtree()
        # This is a numpy array
        self.valid_input_index = valid_input_index

        if resample_kdtree.n == 0:
            # Handle if all input data is reduced away
            bilinear_t, bilinear_s, valid_input_index, index_array = \
                _create_empty_bil_info(self.source_geo_def,
                                       self.target_geo_def)
            self.bilinear_t = bilinear_t
            self.bilinear_s = bilinear_s
            self.valid_input_index = valid_input_index
            self.index_array = index_array

            return bilinear_t, bilinear_s, valid_input_index, index_array

        target_lons, target_lats = self.target_geo_def.get_lonlats()
        valid_output_idx = ((target_lons >= -180) & (target_lons <= 180) &
                            (target_lats <= 90) & (target_lats >= -90))

        index_array, distance_array = self._query_resample_kdtree(
            resample_kdtree, target_lons, target_lats, valid_output_idx)

        # Reduce index reference
        input_size = da.sum(self.valid_input_index)
        index_mask = index_array == input_size
        index_array = da.where(index_mask, 0, index_array)

        # Get output projection as pyproj object
        proj = Proj(self.target_geo_def.proj_str)

        # Get output x/y coordinates
        out_x, out_y = self.target_geo_def.get_proj_coords(chunks=CHUNK_SIZE)
        out_x = da.ravel(out_x)
        out_y = da.ravel(out_y)

        # Get input x/y coordinates
        in_x, in_y = _get_input_xy_dask(self.source_geo_def, proj,
                                        self.valid_input_index, index_array)

        # Get the four closest corner points around each output location
        pt_1, pt_2, pt_3, pt_4, index_array = \
            _get_bounding_corners_dask(in_x, in_y, out_x, out_y,
                                       self.neighbours, index_array)

        # Calculate vertical and horizontal fractional distances t and s
        t__, s__ = _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y)
        self.bilinear_t, self.bilinear_s = t__, s__

        self.valid_output_index = valid_output_idx
        self.index_array = index_array
        self.distance_array = distance_array

        self._get_slices()

        return (self.bilinear_t, self.bilinear_s,
                self.slices, self.mask_slices,
                self.out_coords)
Пример #28
0
        #lat = da.from_array(latitude, chunks =(2030,1354))
        latitude = xr.open_dataset(MOD03_file,drop_variables=var_list)['Latitude'][:,:].values


        lat = da.concatenate((lat,latitude),axis=0)


        #longitude = myd03.variables["Longitude"][:,:] # Reading Specific Variable 'Longitude'.
        longitude = xr.open_dataset(MOD03_file,drop_variables=var_list)['Longitude'][:,:].values
        #lon = da.from_array(longitude, chunks =(2030,1354))
        lon = da.concatenate((lon,longitude),axis=0)

    print('Longitude Shape Is: ',lon.shape)
    print('Latitude Shape Is: ',lat.shape)

    cm=da.ravel(cm)
    lat=da.ravel(lat)
    lon=da.ravel(lon)

    lon=lon.astype(int)
    lat=lat.astype(int)
    cm=cm.astype(int)

    Lat=lat.to_dask_dataframe()
    Lon=lon.to_dask_dataframe()
    CM=cm.to_dask_dataframe()

    df=dd.concat([Lat,Lon,CM],axis=1,interleave_partitions=False)

    cols = {0:'Latitude',1:'Longitude',2:'CM'}
    df = df.rename(columns=cols)
Пример #29
0
def aggregateOneFileData(M06_file, M03_file):
    print("aggregateOneFileData function with M06_file and M03_file:" +
          M06_file + ", " + M03_file)

    var_list = [
        'Scan Offset', 'Track Offset', 'Height Offset', 'Height',
        'SensorZenith', 'Range', 'SolarZenith', 'SolarAzimuth', 'Land/SeaMask',
        'WaterPresent', 'gflags', 'Scan number', 'EV frames', 'Scan Type',
        'EV start time', 'SD start time', 'SV start time', 'EV center time',
        'Mirror side', 'SD Sun zenith', 'SD Sun azimuth', 'Moon Vector',
        'orb_pos', 'orb_vel', 'T_inst2ECR', 'attitude_angles', 'sun_ref',
        'impulse_enc', 'impulse_time', 'thermal_correction', 'SensorAzimuth'
    ]

    b1 = []
    cm = np.zeros((2030, 1354), dtype=np.float32)
    lat = np.zeros((2030, 1354), dtype=np.float32)
    lon = np.zeros((2030, 1354), dtype=np.float32)

    #myd06 = Dataset(M06_file, "r")
    #CM = myd06.variables["Cloud_Mask_1km"][:,:,0]# Reading Specific Variable 'Cloud_Mask_1km'.
    d06 = xr.open_dataset(
        M06_file, drop_variables="Scan Type")['Cloud_Mask_1km'][:, :, 0].values
    #d06CM = d06[::3,::3]
    ds06_decoded = (np.array(d06, dtype='byte') & 0b00000110) >> 1
    CM = np.array(ds06_decoded).byteswap().newbyteorder()

    cm = da.concatenate((cm, CM), axis=0)

    cm = da.ravel(cm)

    #myd03 = Dataset(M03_file, "r")
    #latitude = myd03.variables["Latitude"][:,:]
    #longitude = myd03.variables["Longitude"][:,:]

    latitude = xr.open_dataset(
        M03_file, drop_variables=var_list)['Latitude'][:, :].values
    longitude = xr.open_dataset(
        M03_file, drop_variables=var_list)['Longitude'][:, :].values
    lat = da.concatenate((lat, latitude), axis=0)
    lon = da.concatenate((lon, longitude), axis=0)
    #print("lat shape after con:",lat.shape)
    #print("lon shape after con:",lon.shape)

    lat = da.ravel(lat)
    lon = da.ravel(lon)

    #print("lat shape after ravel:",lat.shape)
    #print("lon shape after ravel:",lon.shape)
    cm = cm.astype(int)
    lon = lon.astype(int)
    lat = lat.astype(int)
    lat = lat + 90
    lon = lon + 180
    Lat = lat.to_dask_dataframe()
    Lon = lon.to_dask_dataframe()
    CM = cm.to_dask_dataframe()
    df = dd.concat([Lat, Lon, CM], axis=1, interleave_partitions=False)
    #print(type(df))

    cols = {0: 'Latitude', 1: 'Longitude', 2: 'CM'}
    df = df.rename(columns=cols)

    df2 = (df.groupby(['Longitude',
                       'Latitude']).CM.apply(countzero).reset_index())
    print(df2)
    #return df2
    #df3=df2.compute()
    b1.append(df2)
    print("printing b1:")
    print(b1)
    return b1
Пример #30
0
    def get_sample_from_bil_info(self, data, fill_value=np.nan,
                                 output_shape=None):
        if fill_value is None:
            fill_value = np.nan
        # FIXME: can be this made into a dask construct ?
        cols, lines = np.meshgrid(np.arange(data['x'].size),
                                  np.arange(data['y'].size))
        cols = da.ravel(cols)
        lines = da.ravel(lines)
        try:
            self.valid_input_index = self.valid_input_index.compute()
        except AttributeError:
            pass
        vii = self.valid_input_index.squeeze()
        try:
            self.index_array = self.index_array.compute()
        except AttributeError:
            pass

        # ia contains reduced (valid) indices of the source array, and has the
        # shape of the destination array
        ia = self.index_array
        rlines = lines[vii][ia]
        rcols = cols[vii][ia]

        slices = []
        mask_slices = []
        mask_2d_added = False
        coords = {}
        try:
            # FIXME: Use same chunk size as input data
            coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask()
        except AttributeError:
            coord_x, coord_y = None, None

        for _, dim in enumerate(data.dims):
            if dim == 'y':
                slices.append(rlines)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_y is not None:
                    coords[dim] = coord_y
            elif dim == 'x':
                slices.append(rcols)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_x is not None:
                    coords[dim] = coord_x
            else:
                slices.append(slice(None))
                mask_slices.append(slice(None))
                try:
                    coords[dim] = data.coords[dim]
                except KeyError:
                    pass

        res = data.values[slices]
        res[mask_slices] = fill_value

        try:
            p_1 = res[:, :, 0]
            p_2 = res[:, :, 1]
            p_3 = res[:, :, 2]
            p_4 = res[:, :, 3]
        except IndexError:
            p_1 = res[:, 0]
            p_2 = res[:, 1]
            p_3 = res[:, 2]
            p_4 = res[:, 3]

        s__, t__ = self.bilinear_s, self.bilinear_t

        res = (p_1 * (1 - s__) * (1 - t__) +
               p_2 * s__ * (1 - t__) +
               p_3 * (1 - s__) * t__ +
               p_4 * s__ * t__)

        epsilon = 1e-6
        data_min = da.nanmin(data) - epsilon
        data_max = da.nanmax(data) + epsilon

        idxs = (res > data_max) | (res < data_min)
        res = da.where(idxs, fill_value, res)
        shp = self.target_geo_def.shape
        if data.ndim == 3:
            res = da.reshape(res, (res.shape[0], shp[0], shp[1]))
        else:
            res = da.reshape(res, (shp[0], shp[1]))
        res = DataArray(da.from_array(res, chunks=CHUNK_SIZE),
                        dims=data.dims, coords=coords)

        return res
Пример #31
0
    def get_sample_from_bil_info(self, data, fill_value=np.nan,
                                 output_shape=None):
        if fill_value is None:
            fill_value = np.nan
        # FIXME: can be this made into a dask construct ?
        cols, lines = np.meshgrid(np.arange(data['x'].size),
                                  np.arange(data['y'].size))
        cols = da.ravel(cols)
        lines = da.ravel(lines)
        try:
            self.valid_input_index = self.valid_input_index.compute()
        except AttributeError:
            pass
        vii = self.valid_input_index.squeeze()
        try:
            self.index_array = self.index_array.compute()
        except AttributeError:
            pass

        # ia contains reduced (valid) indices of the source array, and has the
        # shape of the destination array
        ia = self.index_array
        rlines = lines[vii][ia]
        rcols = cols[vii][ia]

        slices = []
        mask_slices = []
        mask_2d_added = False
        coords = {}
        try:
            # FIXME: Use same chunk size as input data
            coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask()
        except AttributeError:
            coord_x, coord_y = None, None

        for _, dim in enumerate(data.dims):
            if dim == 'y':
                slices.append(rlines)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_y is not None:
                    coords[dim] = coord_y
            elif dim == 'x':
                slices.append(rcols)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_x is not None:
                    coords[dim] = coord_x
            else:
                slices.append(slice(None))
                mask_slices.append(slice(None))
                try:
                    coords[dim] = data.coords[dim]
                except KeyError:
                    pass

        res = data.values[slices]
        res[mask_slices] = fill_value

        try:
            p_1 = res[:, :, 0]
            p_2 = res[:, :, 1]
            p_3 = res[:, :, 2]
            p_4 = res[:, :, 3]
        except IndexError:
            p_1 = res[:, 0]
            p_2 = res[:, 1]
            p_3 = res[:, 2]
            p_4 = res[:, 3]

        s__, t__ = self.bilinear_s, self.bilinear_t

        res = (p_1 * (1 - s__) * (1 - t__) +
               p_2 * s__ * (1 - t__) +
               p_3 * (1 - s__) * t__ +
               p_4 * s__ * t__)

        epsilon = 1e-6
        data_min = da.nanmin(data) - epsilon
        data_max = da.nanmax(data) + epsilon

        idxs = (res > data_max) | (res < data_min)
        res = da.where(idxs, fill_value, res)
        shp = self.target_geo_def.shape
        if data.ndim == 3:
            res = da.reshape(res, (res.shape[0], shp[0], shp[1]))
        else:
            res = da.reshape(res, (shp[0], shp[1]))
        res = DataArray(da.from_array(res, chunks=CHUNK_SIZE),
                        dims=data.dims, coords=coords)

        return res