def test_optimize_rasters_small(tiny_raster_file, tmpdir):
    from terracotta.cog import validate
    from terracotta.scripts import cli

    input_pattern = str(tiny_raster_file)
    outfile = tmpdir / tiny_raster_file.basename

    runner = CliRunner()
    result = runner.invoke(
        cli.cli, ['optimize-rasters', input_pattern, '-o',
                  str(tmpdir)])

    assert result.exit_code == 0, format_exception(result)
    assert outfile.check()

    # validate files
    # (small rasters don't need overviews, so input file is valid, too)
    assert validate(str(tiny_raster_file))
    assert validate(str(outfile))

    # check for data integrity
    with rasterio.open(str(tiny_raster_file)) as src1, rasterio.open(
            str(outfile)) as src2:
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'invalid value encountered.*')
            np.testing.assert_array_equal(src1.read(), src2.read())
def test_optimize_rasters(unoptimized_raster_file, tmpdir, in_memory,
                          reproject, compression, nproc):
    from terracotta.cog import validate
    from terracotta.scripts import cli

    input_pattern = str(unoptimized_raster_file.dirpath('*.tif'))
    outfile = tmpdir / unoptimized_raster_file.basename

    runner = CliRunner()

    flags = ['--compression', compression, '-q']

    if in_memory is not None:
        flags.append('--in-memory' if in_memory else '--no-in-memory')

    if reproject:
        flags.append('--reproject')

    if nproc is not None:
        flags.append(f'--nproc={nproc}')

    result = runner.invoke(
        cli.cli,
        ['optimize-rasters', input_pattern, '-o',
         str(tmpdir), *flags])

    assert result.exit_code == 0, format_exception(result)
    assert outfile.check()

    # validate files
    assert not validate(str(unoptimized_raster_file))
    assert validate(str(outfile))

    if reproject:
        return

    # check for data integrity
    with rasterio.open(str(unoptimized_raster_file)) as src1, rasterio.open(
            str(outfile)) as src2:
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'invalid value encountered.*')
            np.testing.assert_array_equal(src1.read(), src2.read())
示例#3
0
def test_validate_optimized_small(tmpdir):
    from terracotta import cog

    outfile = str(tmpdir / 'raster.tif')
    raster_data = 1000 * np.random.rand(128, 128).astype(np.uint16)

    profile = BASE_PROFILE.copy()
    profile.update(height=raster_data.shape[0], width=raster_data.shape[1])

    with rasterio.open(outfile, 'w', **profile) as dst:
        dst.write(raster_data, 1)

    assert cog.validate(outfile)
示例#4
0
def test_validate_not_gtiff(tmpdir):
    from terracotta import cog

    outfile = str(tmpdir / 'raster.png')
    raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16)

    profile = BASE_PROFILE.copy()
    profile.update(height=raster_data.shape[0],
                   width=raster_data.shape[1],
                   driver='PNG')

    with rasterio.open(outfile, 'w', **profile) as dst:
        dst.write(raster_data, 1)

    assert not cog.validate(outfile)
示例#5
0
def test_validate_not_tiled(tmpdir):
    from terracotta import cog

    outfile = str(tmpdir / 'raster.tif')
    raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16)

    profile = BASE_PROFILE.copy()
    profile.update(height=raster_data.shape[0], width=raster_data.shape[1])

    with rasterio.open(outfile, 'w', **profile) as dst:
        dst.write(raster_data, 1)

        overviews = [2**j for j in range(1, 4)]
        dst.build_overviews(overviews, Resampling.nearest)

    assert not cog.validate(outfile)
示例#6
0
def test_validate_no_overviews(tmpdir):
    from terracotta import cog

    outfile = str(tmpdir / 'raster.tif')
    raster_data = 1000 * np.random.rand(1024, 1024).astype(np.uint16)

    profile = BASE_PROFILE.copy()
    profile.update(height=raster_data.shape[0],
                   width=raster_data.shape[1],
                   tiled=True,
                   blockxsize=256,
                   blockysize=256)

    with rasterio.open(outfile, 'w', **profile) as dst:
        dst.write(raster_data, 1)

    assert not cog.validate(outfile)
示例#7
0
def test_validate_optimized(tmpdir):
    from terracotta import cog

    outfile = str(tmpdir / 'raster.tif')
    raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16)

    profile = BASE_PROFILE.copy()
    profile.update(height=raster_data.shape[0],
                   width=raster_data.shape[1],
                   tiled=True,
                   blockxsize=256,
                   blockysize=256)

    with MemoryFile() as mf, mf.open(**profile) as dst:
        dst.write(raster_data, 1)

        overviews = [2**j for j in range(1, 4)]
        dst.build_overviews(overviews, Resampling.nearest)

        copy(dst, outfile, copy_src_overviews=True, **profile)

    assert cog.validate(outfile)
示例#8
0
def test_validate_external_overview(tmpdir):
    import os
    from terracotta import cog

    outfile = str(tmpdir / 'raster.tif')
    raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16)

    profile = BASE_PROFILE.copy()
    profile.update(height=raster_data.shape[0],
                   width=raster_data.shape[1],
                   tiled=True,
                   blockxsize=256,
                   blockysize=256)

    with rasterio.Env(TIFF_USE_OVR=True):
        with rasterio.open(outfile, 'w', **profile) as dst:
            dst.write(raster_data, 1)

            overviews = [2**j for j in range(1, 4)]
            dst.build_overviews(overviews, Resampling.nearest)

        assert os.path.isfile(f'{outfile}.ovr')

    assert not cog.validate(outfile)
示例#9
0
    def compute_metadata(cls, raster_path: str, *,
                         extra_metadata: Any = None,
                         use_chunks: bool = None,
                         max_shape: Sequence[int] = None) -> Dict[str, Any]:
        """Read given raster file and compute metadata from it.

        This handles most of the heavy lifting during raster ingestion. The returned metadata can
        be passed directly to :meth:`insert`.

        Arguments:

            raster_path: Path to GDAL-readable raster file
            extra_metadata: Any additional metadata to attach to the dataset. Will be
                JSON-serialized and returned verbatim by :meth:`get_metadata`.
            use_chunks: Whether to process the image in chunks (slower, but uses less memory).
                If not given, use chunks for large images only.
            max_shape: Gives the maximum number of pixels used in each dimension to compute
                metadata. Setting this to a relatively small size such as ``(1024, 1024)`` will
                result in much faster metadata computation for large images, at the expense of
                inaccurate results.

        """
        import rasterio
        from rasterio import warp
        from terracotta.cog import validate

        row_data: Dict[str, Any] = {}
        extra_metadata = extra_metadata or {}

        if max_shape is not None and len(max_shape) != 2:
            raise ValueError('max_shape argument must contain 2 values')

        if use_chunks and max_shape is not None:
            raise ValueError('Cannot use both use_chunks and max_shape arguments')

        with rasterio.Env(**cls._RIO_ENV_KEYS):
            if not validate(raster_path):
                warnings.warn(
                    f'Raster file {raster_path} is not a valid cloud-optimized GeoTIFF. '
                    'Any interaction with it will be significantly slower. Consider optimizing '
                    'it through `terracotta optimize-rasters` before ingestion.',
                    exceptions.PerformanceWarning, stacklevel=3
                )

            with rasterio.open(raster_path) as src:
                if src.nodata is None and not cls._has_alpha_band(src):
                    warnings.warn(
                        f'Raster file {raster_path} does not have a valid nodata value, '
                        'and does not contain an alpha band. No data will be masked.'
                    )

                bounds = warp.transform_bounds(
                    src.crs, 'epsg:4326', *src.bounds, densify_pts=21
                )

                if use_chunks is None and max_shape is None:
                    use_chunks = src.width * src.height > RasterDriver._LARGE_RASTER_THRESHOLD

                    if use_chunks:
                        logger.debug(
                            f'Computing metadata for file {raster_path} using more than '
                            f'{RasterDriver._LARGE_RASTER_THRESHOLD // 10**6}M pixels, iterating '
                            'over chunks'
                        )

                if use_chunks and not has_crick:
                    warnings.warn(
                        'Processing a large raster file, but crick failed to import. '
                        'Reading whole file into memory instead.', exceptions.PerformanceWarning
                    )
                    use_chunks = False

                if use_chunks:
                    raster_stats = RasterDriver._compute_image_stats_chunked(src)
                else:
                    raster_stats = RasterDriver._compute_image_stats(src, max_shape)

        if raster_stats is None:
            raise ValueError(f'Raster file {raster_path} does not contain any valid data')

        row_data.update(raster_stats)

        row_data['bounds'] = bounds
        row_data['metadata'] = extra_metadata

        return row_data
示例#10
0
def compute_metadata(path: str,
                     *,
                     extra_metadata: Any = None,
                     use_chunks: bool = None,
                     max_shape: Sequence[int] = None,
                     large_raster_threshold: int = None,
                     rio_env_options: Dict[str, Any] = None) -> Dict[str, Any]:
    import rasterio
    from rasterio import warp
    from terracotta.cog import validate

    row_data: Dict[str, Any] = {}
    extra_metadata = extra_metadata or {}

    if max_shape is not None and len(max_shape) != 2:
        raise ValueError('max_shape argument must contain 2 values')

    if use_chunks and max_shape is not None:
        raise ValueError('Cannot use both use_chunks and max_shape arguments')

    if rio_env_options is None:
        rio_env_options = {}

    with rasterio.Env(**rio_env_options):
        if not validate(path):
            warnings.warn(
                f'Raster file {path} is not a valid cloud-optimized GeoTIFF. '
                'Any interaction with it will be significantly slower. Consider optimizing '
                'it through `terracotta optimize-rasters` before ingestion.',
                exceptions.PerformanceWarning,
                stacklevel=3)

        with rasterio.open(path) as src:
            if src.nodata is None and not has_alpha_band(src):
                warnings.warn(
                    f'Raster file {path} does not have a valid nodata value, '
                    'and does not contain an alpha band. No data will be masked.'
                )

            bounds = warp.transform_bounds(src.crs,
                                           'epsg:4326',
                                           *src.bounds,
                                           densify_pts=21)

            if use_chunks is None and max_shape is None and large_raster_threshold is not None:
                use_chunks = src.width * src.height > large_raster_threshold

                if use_chunks:
                    logger.debug(
                        f'Computing metadata for file {path} using more than '
                        f'{large_raster_threshold // 10**6}M pixels, iterating '
                        'over chunks')

            if use_chunks and not has_crick:
                warnings.warn(
                    'Processing a large raster file, but crick failed to import. '
                    'Reading whole file into memory instead.',
                    exceptions.PerformanceWarning)
                use_chunks = False

            if use_chunks:
                raster_stats = compute_image_stats_chunked(src)
            else:
                raster_stats = compute_image_stats(src, max_shape)

    if raster_stats is None:
        raise ValueError(f'Raster file {path} does not contain any valid data')

    row_data.update(raster_stats)

    row_data['bounds'] = bounds
    row_data['metadata'] = extra_metadata
    return row_data