Пример #1
0
    def test_read_write(self):
        """
        Test a simple write and read operation.
        """
        data = np.ones((100, 100, 100), dtype=np.float32)
        dims = ['time', 'y', 'x']
        coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])}
        attr1 = {'unit': 'dB'}
        attr2 = {'unit': 'degree', 'fill_value': -9999}

        self.ds = xr.Dataset(
            {
                'sig': (dims, data, attr1),
                'inc': (dims, data, attr2),
                'azi': (dims, data, attr2)
            },
            coords=coords)

        with NcFile(self.filename, mode='w') as nc:
            nc.write(self.ds)

        with NcFile(self.filename) as nc:
            ds = nc.read()
            np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:])
            np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:])
            np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:])
Пример #2
0
    def test_append(self):
        """
        Test appending to existing NetCDF file.
        """
        data = np.ones((100, 100, 100), dtype=np.float32)
        dims = ['time', 'y', 'x']
        coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])}

        self.ds = xr.Dataset({
            'sig': (dims, data),
            'inc': (dims, data)
        },
                             coords=coords)

        with NcFile(self.filename, mode='w') as nc:
            nc.write(self.ds)

        with NcFile(self.filename, mode='a') as nc:
            nc.write(self.ds)

        with NcFile(self.filename, mode='a') as nc:
            nc.write(self.ds)

        with NcFile(self.filename) as nc:
            ds = nc.read()

            np.testing.assert_array_equal(
                ds['sig'][:], np.repeat(self.ds['sig'][:], 3, axis=0))
Пример #3
0
    def test_geotransform(self):
        """
        Test computation of x and y coordinates.
        """
        xdim = 100
        ydim = 200
        data = np.ones((100, ydim, xdim), dtype=np.float32)
        dims = ['time', 'y', 'x']
        coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])}

        self.ds = xr.Dataset({
            'sig': (dims, data),
            'inc': (dims, data)
        },
                             coords=coords)

        geotrans = (3000000.0, 500.0, 0.0, 1800000.0, 0.0, -500.0)
        with NcFile(self.filename, mode='w', geotrans=geotrans) as nc:
            nc.write(self.ds)

        with NcFile(self.filename) as nc:
            ds = nc.read()

        x = geotrans[0] + (0.5 + np.arange(xdim)) * geotrans[1] + \
            (0.5 + np.arange(xdim)) * geotrans[2]
        y = geotrans[3] + (0.5 + np.arange(ydim)) * geotrans[4] + \
            (0.5 + np.arange(ydim)) * geotrans[5]

        np.testing.assert_array_equal(ds['x'].values, x)
        np.testing.assert_array_equal(ds['y'].values, y)
Пример #4
0
    def test_chunk_cache(self):
        """
        Test setting chunk cache.
        """
        data = np.ones((100, 100, 100), dtype=np.float32)
        dims = ['time', 'y', 'x']
        coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])}

        self.ds = xr.Dataset({
            'sig': (dims, data),
            'inc': (dims, data)
        },
                             coords=coords)

        size = 1024 * 64
        nelems = 500
        preemption = 0.75

        var_chunk_cache = (size, nelems, preemption)
        with NcFile(self.filename, mode='w',
                    var_chunk_cache=var_chunk_cache) as nc:
            nc.write(self.ds)
            self.assertEqual(var_chunk_cache,
                             nc.src['sig'].get_var_chunk_cache())

        with NcFile(self.filename,
                    mode='r_netcdf',
                    var_chunk_cache=var_chunk_cache) as nc:
            self.assertEqual(var_chunk_cache,
                             nc.src['sig'].get_var_chunk_cache())
Пример #5
0
    def test_auto_decoding(self):
        """
        Test automatic decoding of data variables.
        """
        data = np.ones((100, 100, 100), dtype=np.float32)
        dims = ['time', 'x', 'y']
        coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])}
        attr1 = {
            'unit': 'dB',
            'scale_factor': 2,
            'add_offset': 3,
            'fill_value': -9999
        }
        attr2 = {
            'unit': 'degree',
            'fill_value': -9999,
            'scale_factor': 2,
            'add_offset': 0
        }
        attr3 = {'unit': 'degree', 'fill_value': -9999}

        self.ds = xr.Dataset(
            {
                'sig': (dims, data, attr1),
                'inc': (dims, data, attr2),
                'azi': (dims, data, attr3)
            },
            coords=coords)

        with NcFile(self.filename, mode='w') as nc:
            nc.write(self.ds)

        with NcFile(self.filename, mode='r_xarray', auto_decode=False) as nc:
            ds = nc.read()
            np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:])
            np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:])
            np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:])

        with NcFile(self.filename, mode='r_xarray', auto_decode=True) as nc:
            ds = nc.read()
            np.testing.assert_array_equal(ds['sig'][:],
                                          self.ds['sig'][:] * 2 + 3)
            np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:] * 2)
            np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:])

        with NcFile(self.filename, mode='r_netcdf', auto_decode=False) as nc:
            ds = nc.read()
            np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:])
            np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:])
            np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:])

        with NcFile(self.filename, mode='r_netcdf', auto_decode=True) as nc:
            ds = nc.read()
            np.testing.assert_array_equal(ds['sig'][:],
                                          self.ds['sig'][:] * 2 + 3)
            np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:] * 2)
            np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:])
Пример #6
0
    def write(self, ds):
        """
        Write data set into raster time stack.

        Parameters
        ----------
        ds : xarray.Dataset
            Input data set.
        """
        if self.stack_size == 'single':
            fn = '{:}{:}'.format(self.fn_prefix, self.fn_suffix)
            full_filename = os.path.join(self.out_path, fn)

            if os.path.exists(full_filename):
                mode = 'a'
            else:
                mode = 'w'

            with NcFile(full_filename,
                        mode=mode,
                        complevel=self.compression,
                        geotransform=self.geotransform,
                        spatialref=self.spatialref,
                        chunksizes=self.chunksizes) as nc:
                nc.write(ds)

            filenames = [full_filename]
        else:
            dup_stack_filenames = ds['time'].to_index().strftime(
                self.stack_size)
            stack_filenames, index = np.unique(dup_stack_filenames,
                                               return_index=True)
            index = np.hstack((index, len(dup_stack_filenames)))

            filenames = []
            for i, filename in enumerate(stack_filenames):
                time_sel = np.arange(index[i], index[i + 1])
                fn = '{:}{:}{:}'.format(self.fn_prefix, filename,
                                        self.fn_suffix)
                full_filename = os.path.join(self.out_path, fn)
                filenames.append(full_filename)

                if os.path.exists(full_filename):
                    mode = 'a'
                else:
                    mode = 'w'

                with NcFile(full_filename,
                            mode=mode,
                            complevel=self.compression,
                            geotransform=self.geotransform,
                            spatialref=self.spatialref,
                            chunksizes=self.chunksizes) as nc:
                    nc.write(ds.isel(time=time_sel))

        return pd.DataFrame({'filenames': filenames})
Пример #7
0
    def write(self,
              ds,
              filepath,
              band=None,
              encoder=None,
              nodataval=None,
              encoder_kwargs=None,
              auto_scale=False):
        """
        Write data set into raster time stack.

        Parameters
        ----------
        ds : xarray.Dataset
            Input data set.
        """
        if os.path.exists(filepath):
            mode = 'a'
        else:
            mode = 'w'

        with NcFile(filepath,
                    mode=mode,
                    complevel=self.compression,
                    geotrans=self.geotrans,
                    sref=self.sref,
                    chunksizes=self.chunksizes) as nc:
            nc.write(ds,
                     band=band,
                     nodataval=nodataval,
                     encoder=encoder,
                     encoder_kwargs=encoder_kwargs)
Пример #8
0
 def _build_stack(self):
     """
     Building file stack and initialize netCDF4.mfdataset.
     """
     if self.inventory is not None:
         if self._dims == 2:
             self.mfdataset = xr.open_mfdataset(
                 self.inventory.dropna()['filepath'].tolist(),
                 chunks=self.chunks,
                 combine="nested",
                 concat_dim=self.inventory.index.name,
                 mask_and_scale=self.auto_decode,
                 use_cftime=False)
             self.mfdataset = self.mfdataset.assign_coords(
                 {self.inventory.index.name: self.inventory.index})
             gm_name = NcFile.get_gm_name(self.mfdataset)
             if gm_name is not None:
                 self.mfdataset[gm_name] = self.mfdataset[gm_name].sel(
                     **{self.inventory.index.name: 0}, drop=True)
         else:
             self.mfdataset = xr.open_mfdataset(
                 self.inventory.dropna()['filepath'].tolist(),
                 chunks=self.chunks,
                 combine='by_coords',
                 mask_and_scale=self.auto_decode,
                 use_cftime=False)
     else:
         raise RuntimeError('Building stack failed')
Пример #9
0
    def write_netcdfs(self,
                      ds,
                      dir_path,
                      stack_size="%Y%m",
                      fn_prefix='',
                      fn_suffix='.nc'):

        #inclusive left, exclusive right
        #stacks are smaller than 1D
        if any(x in stack_size for x in ['H', 'min', 'T']):
            dup_stack_filenames = ds['time'].to_index().floor(stack_size)
        else:
            dup_stack_filenames = ds['time'].to_index().strftime(stack_size)

        stack_filenames, index = np.unique(dup_stack_filenames,
                                           return_index=True)
        index = np.hstack((index, len(dup_stack_filenames)))

        filepaths = []
        timestamps = []
        for i, stack_filename in enumerate(stack_filenames):
            time_sel = np.arange(index[i], index[i + 1])

            if any(x in stack_size for x in ['H', 'min', 'T']):
                timestamp = ds['time'][[
                    index[i]
                ]].to_index().floor(stack_size)[0].to_datetime64()
                stack_filename = pd.to_datetime(
                    str(stack_filename)).strftime('%Y%m%d_%H%M%S')
            else:
                timestamp = datetime.strptime(
                    ds['time'][[index[i]]].to_index().strftime(stack_size)[0],
                    stack_size)
            timestamps.append(timestamp)
            filename = '{:}{:}{:}'.format(fn_prefix, stack_filename, fn_suffix)
            filepath = os.path.join(dir_path, filename)
            filepaths.append(filepath)

            if os.path.exists(filepath):
                mode = 'a'
            else:
                mode = 'w'

            with NcFile(filepath,
                        mode=mode,
                        complevel=self.compression,
                        geotrans=self.geotrans,
                        sref=self.sref,
                        chunksizes=self.chunksizes) as nc:
                nc.write(ds.isel(time=time_sel))

        return pd.DataFrame({'filepath': filepaths}, index=timestamps)
Пример #10
0
    def test_time_units(self):
        """
        Test time series and time units.
        """
        data = np.ones((100, 100, 100), dtype=np.float32)
        dims = ['time', 'y', 'x']
        coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])}

        self.ds = xr.Dataset({
            'sig': (dims, data),
            'inc': (dims, data)
        },
                             coords=coords)

        time_units = 'days since 2000-01-01 00:00:00'
        with NcFile(self.filename, mode='w', time_units=time_units) as nc:
            nc.write(self.ds)

        with NcFile(self.filename, time_units=time_units) as nc:
            ds = nc.read()
            np.testing.assert_array_equal(pd.DatetimeIndex(ds['time'].data),
                                          coords['time'])
Пример #11
0
    def test_chunksizes(self):
        """
        Test setting chunksize.
        """
        data = np.ones((100, 100, 100), dtype=np.float32)
        dims = ['time', 'y', 'x']
        coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])}

        self.ds = xr.Dataset({
            'sig': (dims, data),
            'inc': (dims, data)
        },
                             coords=coords)

        chunksizes = (100, 10, 10)
        with NcFile(self.filename, mode='w', chunksizes=chunksizes) as nc:
            nc.write(self.ds)

        with NcFile(self.filename, mode='r_netcdf') as nc:
            ds = nc.read()
            self.assertEqual(ds['sig'].data.chunksize, chunksizes)
            self.assertEqual(ds['inc'].data.chunksize, chunksizes)
Пример #12
0
    def _open(self):

        if self.inventory is not None:
            ref_filepath = self.inventory[
                self.inventory.notnull()]['filepath'][0]
            with NcFile(ref_filepath, mode='r') as netcdf:
                self.sref = netcdf.sref
                self.geotrans = netcdf.geotrans
                self.metadata = netcdf.metadata
                self.shape = (len(self.inventory), netcdf.shape[-2],
                              netcdf.shape[-1])
                self._dims = len(netcdf.shape)

            self._build_stack()
Пример #13
0
def setup_nc_single_test_data():
    """
    Creates test data as a multi-time and multi-variable NetCDF file.

    Returns
    -------
    str
        NetCDF test data filepath.
    list of datetime
        List of timestamps as datetime objects.
    """

    root_dirpath = os.path.join(dirpath_test(), 'data', 'Sentinel-1_CSAR')

    # create target folders
    dirpath = os.path.join(root_dirpath, 'IWGRDH', 'products', 'datasets', 'resampled', 'T0101', 'EQUI7_EU500M',
                           'E048N012T6', 'data')

    timestamps = [datetime(2016, 1, 1), datetime(2016, 2, 1), datetime(2017, 1, 1), datetime(2017, 2, 1)]

    var_names = ["SIG0", "GMR-"]
    directions = ["A", "D"]
    combs = itertools.product(var_names, directions, timestamps)

    rows, cols = np.meshgrid(np.arange(0, 1200), np.arange(0, 1200))
    data = rows + cols
    equi7 = Equi7Grid(500)
    tile_oi = equi7.EU.tilesys.create_tile(name="E042N012T6")

    xr_dss = []
    filepath = os.path.join(dirpath, "D20160101_20170201_PREPRO---_S1AIWGRDH1VV-_146_T0101_EU500M_E048N012T6.nc")
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

    if not os.path.exists(filepath):
        for comb in combs:
            var_name = comb[0]
            direction = comb[1]
            timestamp = comb[2]

            tags = {'direction': direction}

            data_i = data + timestamps.index(timestamp)
            xr_ar = xr.DataArray(data=data_i[None, :, :], coords={'time': [timestamp]},
                                 dims=['time', 'y', 'x'], attrs=tags)
            xr_dss.append(xr.Dataset(data_vars={var_name.strip('-'): xr_ar}))

        nc_file = NcFile(filepath, mode='w', geotransform=tile_oi.geotransform(),
                         spatialref=tile_oi.get_geotags()['spatialreference'])
        xr_ds = xr.merge(xr_dss)
        nc_file.write(xr_ds)
        nc_file.close()

    timestamps = [pd.Timestamp(timestamp.strftime("%Y%m%d")) for timestamp in timestamps]

    return filepath, timestamps
Пример #14
0
def setup_nc_multi_test_data():
    """
    Creates test data as single-time and single-variable NetCDF files.

    Returns
    -------
    list of str
        List of NetCDF test data filepaths.
    list of datetime
        List of timestamps as datetime objects.
    """

    root_dirpath = os.path.join(dirpath_test(), 'data', 'Sentinel-1_CSAR')

    # create target folders
    dirpath = os.path.join(root_dirpath, 'IWGRDH', 'parameters', 'datasets', 'resampled', 'T0101', 'EQUI7_EU500M',
                           'E042N012T6', 'sig0')

    timestamps = [datetime(2016, 1, 1), datetime(2016, 2, 1), datetime(2017, 1, 1), datetime(2017, 2, 1)]

    pols = ["VV", "VH"]
    directions = ["A", "D"]
    filename_fmt = "D{}_000000--_SIG0-----_S1AIWGRDH1{}{}_146_T0101_EU500M_E042N012T6.nc"
    combs = itertools.product(pols, directions, timestamps)

    rows, cols = np.meshgrid(np.arange(0, 1200), np.arange(0, 1200))
    data = (rows + cols).astype(float)
    equi7 = Equi7Grid(500)
    tile_oi = equi7.EU.tilesys.create_tile(name="E042N012T6")

    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

    filepaths = []
    for comb in combs:
        pol = comb[0]
        direction = comb[1]
        timestamp = comb[2]
        filename = filename_fmt.format(timestamp.strftime("%Y%m%d"), pol, direction)
        filepath = os.path.join(dirpath, filename)
        filepaths.append(filepath)

        if not os.path.exists(filepath):
            tags = {'direction': direction}
            nc_file = NcFile(filepath, mode='w', geotransform=tile_oi.geotransform(),
                         spatialref=tile_oi.get_geotags()['spatialreference'])
            data_i = data + timestamps.index(timestamp)
            xr_ar = xr.DataArray(data=data_i[None, :, :], coords={'time': [timestamp]},
                                 dims=['time', 'y', 'x'])
            xr_ds = xr.Dataset(data_vars={'1': xr_ar}, attrs=tags)
            nc_file.write(xr_ds)
            nc_file.close()

    timestamps = [pd.Timestamp(timestamp.strftime("%Y%m%d")) for timestamp in timestamps]

    return filepaths, timestamps
Пример #15
0
    def read(self,
             row=None,
             col=None,
             n_rows=1,
             n_cols=1,
             band="1",
             nodataval=-9999,
             decoder=None,
             decoder_kwargs=None):
        """
        Read data from netCDF4 file.

        Parameters
        ----------
        row : int, optional
            Row number/index.
            If None and `col` is not None, then `row_size` rows with the respective column number will be loaded.
        col : int, optional
            Column number/index.
            If None and `row` is not None, then `col_size` columns with the respective row number will be loaded.
        n_rows : int, optional
            Number of rows to read (default is 1).
        n_cols : int, optional
            Number of columns to read (default is 1).
        band : str or list of str, optional
            Band numbers/names. If None, all bands will be read.
        nodataval : tuple or list, optional
            List of no data values for each band.
            Default: -9999 for each band.
        decoder : function, optional
            Decoding function expecting a NumPy array as input.
        decoder_kwargs : dict, optional
            Keyword arguments for the decoder.

        Returns
        -------
        data : xarray.Dataset
            Data set with the dimensions [time, y, x] and one data variable.
        """

        decoder_kwargs = {} if decoder_kwargs is None else decoder_kwargs

        if row is None and col is None:  # read whole dataset
            row = 0
            col = 0
            n_rows = self.shape[-2]
            n_cols = self.shape[-1]
        elif row is None and col is not None:  # read by row
            row = 0
            n_cols = self.shape[-1]
        elif row is not None and col is None:  # read by column
            col = 0
            n_rows = self.shape[-2]

        if len(self.shape) == 3:
            slices = (slice(None), slice(row, row + n_rows),
                      slice(col, col + n_cols))
        else:
            slices = (slice(row, row + n_rows), slice(col, col + n_cols))

        data_ar = self.mfdataset[band][slices]
        if decoder:
            data_ar.data = decoder(data_ar.data, nodataval, **decoder_kwargs)
        data = data_ar.to_dataset()

        if 'time' in list(
                data.dims.keys()) and data.variables['time'].dtype == 'float':
            timestamps = netCDF4.num2date(data['time'],
                                          self.time_units,
                                          only_use_cftime_datetimes=False)
            data = data.assign_coords({'time': timestamps})

        # add projection informations again
        gm_name = NcFile.get_gm_name(self.mfdataset)
        if gm_name is not None:
            data[gm_name] = self.mfdataset[gm_name]

        #add attributes
        data.attrs = self.mfdataset.attrs

        return self._fill_nan(data)