Exemplo n.º 1
0
def make_sample_netcdf(tmpdir):
    """Make a test Geospatial NetCDF file, 4000x4000 int16 random data, in a variable named `sample`.
    Return the GDAL access string."""
    sample_nc = str(tmpdir.mkdir('netcdfs').join('sample.nc'))
    geobox = GeoBox(4000,
                    4000,
                    affine=Affine(25.0, 0.0, 1200000, 0.0, -25.0, -4200000),
                    crs=epsg3577)

    sample_data = np.random.randint(10000, size=(4000, 4000), dtype=np.int16)

    variables = {
        'sample':
        Variable(sample_data.dtype,
                 nodata=-999,
                 dims=geobox.dimensions,
                 units=1)
    }
    nco = create_netcdf_storage_unit(sample_nc,
                                     geobox.crs,
                                     geobox.coordinates,
                                     variables=variables,
                                     variable_params={})

    nco['sample'][:] = sample_data

    nco.close()

    return 'NetCDF:"%s":sample' % sample_nc, geobox, sample_data
Exemplo n.º 2
0
def do_fixer_task(config, task):
    global_attributes = config['global_attributes']

    # Don't keep the original history if we are trying to fix it
    global_attributes['history'] = build_history_string(config,
                                                        task,
                                                        keep_original=False)

    variable_params = config['variable_params']

    output_filename = Path(task['output_filename'])
    output_uri = output_filename.absolute().as_uri()
    temp_filename = get_temp_file(output_filename)
    tile = task['tile']

    # Only use the time chunk size (eg 5), but not spatial chunks
    # This means the file only gets opened once per band, and all data is available when compressing on write
    # 5 * 4000 * 4000 * 2bytes == 152MB, so mem usage is not an issue
    chunk_profile = {'time': config['storage']['chunking']['time']}

    data = datacube.api.GridWorkflow.load(tile, dask_chunks=chunk_profile)

    unwrapped_datasets = xr_apply(tile.sources,
                                  _unwrap_dataset_list,
                                  dtype='O')
    data['dataset'] = datasets_to_doc(unwrapped_datasets)

    try:
        if data.geobox is None:
            raise DatacubeException(
                'Dataset geobox property is None, cannot write to NetCDF file.'
            )

        if data.geobox.crs is None:
            raise DatacubeException(
                'Dataset geobox.crs property is None, cannot write to NetCDF file.'
            )

        nco = create_netcdf_storage_unit(temp_filename, data.geobox.crs,
                                         data.coords, data.data_vars,
                                         variable_params, global_attributes)
        write_data_variables(data.data_vars, nco)
        nco.close()

        temp_filename.rename(output_filename)

        if config.get('check_data_identical', False):
            new_tile = make_updated_tile(unwrapped_datasets, output_uri,
                                         tile.geobox)
            new_data = datacube.api.GridWorkflow.load(
                new_tile, dask_chunks=chunk_profile)
            check_identical(data, new_data, output_filename)

    except Exception as e:
        if temp_filename.exists():
            temp_filename.unlink()
        raise e

    return unwrapped_datasets, output_uri
Exemplo n.º 3
0
def do_stack_task(config, task):
    global_attributes = config['global_attributes']
    global_attributes['history'] = get_history_attribute(config, task)

    variable_params = config['variable_params']

    variable_params['dataset'] = {
        'chunksizes': (1, ),
        'zlib': True,
        'complevel': 9,
    }

    output_filename = Path(task['output_filename'])
    output_uri = output_filename.absolute().as_uri()
    temp_filename = get_temp_file(output_filename)
    tile = task['tile']

    # Only use the time chunk size (eg 5), but not spatial chunks
    # This means the file only gets opened once per band, and all data is available when compressing on write
    # 5 * 4000 * 4000 * 2bytes == 152MB, so mem usage is not an issue
    chunk_profile = {'time': config['storage']['chunking']['time']}

    data = datacube.api.GridWorkflow.load(tile, dask_chunks=chunk_profile)

    unwrapped_datasets = xr_apply(tile.sources,
                                  _unwrap_dataset_list,
                                  dtype='O')
    data['dataset'] = datasets_to_doc(unwrapped_datasets)

    try:
        nco = create_netcdf_storage_unit(temp_filename, data.crs, data.coords,
                                         data.data_vars, variable_params,
                                         global_attributes)
        write_data_variables(data.data_vars, nco)
        nco.close()

        temp_filename.rename(output_filename)

        if config.get('check_data_identical', False):
            new_tile = make_updated_tile(unwrapped_datasets, output_uri,
                                         tile.geobox)
            new_data = datacube.api.GridWorkflow.load(
                new_tile, dask_chunks=chunk_profile)
            check_identical(data, new_data, output_filename)

    except Exception as e:
        if temp_filename.exists():
            temp_filename.unlink()
        raise e

    return unwrapped_datasets, output_uri
Exemplo n.º 4
0
    def _nco_from_sources(self, sources, geobox, measurements, variable_params, filename):

        coordinates = OrderedDict((name, geometry.Coordinate(coord.values, coord.units))
                                  for name, coord in sources.coords.items())
        coordinates.update(geobox.coordinates)

        variables = OrderedDict((variable['name'], Variable(dtype=numpy.dtype(variable['dtype']),
                                                            nodata=variable['nodata'],
                                                            dims=sources.dims + geobox.dimensions,
                                                            units=variable['units']))
                                for variable in measurements)

        return create_netcdf_storage_unit(filename, crs=geobox.crs, coordinates=coordinates,
                                          variables=variables, variable_params=variable_params,
                                          global_attributes=self.global_attributes)