def read_data(self, tile_specifications, file_path, output_tile):
        with Dataset(file_path) as ds:
            for section_spec, dimtoslice in tile_specifications:
                tile = nexusproto.TimeSeriesTile()

                instance_dimension = next(
                    iter([dim for dim in ds[self.variable_to_read].dimensions if dim != self.time]))

                tile.latitude.CopyFrom(
                    to_shaped_array(numpy.ma.filled(ds[self.latitude][dimtoslice[instance_dimension]], numpy.NaN)))

                tile.longitude.CopyFrom(
                    to_shaped_array(numpy.ma.filled(ds[self.longitude][dimtoslice[instance_dimension]], numpy.NaN)))

                # Before we read the data we need to make sure the dimensions are in the proper order so we don't
                # have any indexing issues
                ordered_slices = get_ordered_slices(ds, self.variable_to_read, dimtoslice)
                # Read data using the ordered slices, replacing masked values with NaN
                data_array = numpy.ma.filled(ds[self.variable_to_read][tuple(ordered_slices.values())], numpy.NaN)

                tile.variable_data.CopyFrom(to_shaped_array(data_array))

                if self.metadata is not None:
                    tile.meta_data.add().CopyFrom(
                        to_metadata(self.metadata, ds[self.metadata][tuple(ordered_slices.values())]))

                tile.time.CopyFrom(
                    to_shaped_array(numpy.ma.filled(ds[self.time][dimtoslice[self.time]], numpy.NaN)))

                output_tile.tile.time_series_tile.CopyFrom(tile)

                yield output_tile
    def test_run_chain_partial_empty(self):
        processor_list = [{
            'name': 'GridReadingProcessor',
            'config': {
                'latitude': 'lat',
                'longitude': 'lon',
                'time': 'time',
                'variable_to_read': 'analysed_sst'
            }
        }, {
            'name': 'EmptyTileFilter',
            'config': {}
        }, {
            'name': 'KelvinToCelsius',
            'config': {}
        }, {
            'name': 'TileSummarizingProcessor',
            'config': {}
        }]
        processorchain = ProcessorChain(processor_list)

        test_file = path.join(path.dirname(__file__), 'datafiles',
                              'partial_empty_mur.nc4')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "time:0:1,lat:489:499,lon:0:10"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(processorchain.process(input_tile))

        self.assertEqual(1, len(results))
        tile = results[0]
        self.assertTrue(tile.summary.HasField('bbox'), "bbox is missing")

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(processorchain.process(input_tile))

        self.assertEqual(0, len(results))
    def test_read_not_empty_mur(self):
        test_file = path.join(path.dirname(__file__), 'datafiles',
                              'not_empty_mur.nc4')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(self.module.process(input_tile))

        self.assertEqual(1, len(results))

        tile1_data = np.ma.masked_invalid(
            from_shaped_array(results[0].tile.grid_tile.variable_data))
        self.assertEqual((1, 10, 10), tile1_data.shape)
        self.assertEqual(100, np.ma.count(tile1_data))
Пример #4
0
    def process_nexus_tile(self, nexus_tile):
        the_tile_type = nexus_tile.tile.WhichOneof("tile_type")

        the_tile_data = getattr(nexus_tile.tile, the_tile_type)

        latitudes = numpy.ma.masked_invalid(from_shaped_array(the_tile_data.latitude))
        longitudes = numpy.ma.masked_invalid(from_shaped_array(the_tile_data.longitude))

        data = from_shaped_array(the_tile_data.variable_data)

        if nexus_tile.HasField("summary"):
            tilesummary = nexus_tile.summary
        else:
            tilesummary = nexusproto.TileSummary()

        tilesummary.bbox.lat_min = numpy.nanmin(latitudes).item()
        tilesummary.bbox.lat_max = numpy.nanmax(latitudes).item()
        tilesummary.bbox.lon_min = numpy.nanmin(longitudes).item()
        tilesummary.bbox.lon_max = numpy.nanmax(longitudes).item()

        tilesummary.stats.min = numpy.nanmin(data).item()
        tilesummary.stats.max = numpy.nanmax(data).item()

        # In order to accurately calculate the average we need to weight the data based on the cosine of its latitude
        # This is handled slightly differently for swath vs. grid data
        if the_tile_type == 'swath_tile':
            # For Swath tiles, len(data) == len(latitudes) == len(longitudes). So we can simply weight each element in the
            # data array
            tilesummary.stats.mean = numpy.ma.average(numpy.ma.masked_invalid(data),
                                                      weights=numpy.cos(numpy.radians(latitudes))).item()
        elif the_tile_type == 'grid_tile':
            # Grid tiles need to repeat the weight for every longitude
            # TODO This assumes data axis' are ordered as latitude x longitude
            tilesummary.stats.mean = numpy.ma.average(numpy.ma.masked_invalid(data).flatten(),
                                                      weights=numpy.cos(
                                                          numpy.radians(
                                                              numpy.repeat(latitudes, len(longitudes))))).item()
        else:
            # Default to simple average with no weighting
            tilesummary.stats.mean = numpy.nanmean(data).item()

        tilesummary.stats.count = data.size - numpy.count_nonzero(numpy.isnan(data))

        try:
            min_time, max_time = find_time_min_max(the_tile_data)
            tilesummary.stats.min_time = min_time
            tilesummary.stats.max_time = max_time
        except NoTimeException:
            pass

        try:
            tilesummary.data_var_name = self.stored_var_name
        except TypeError:
            pass

        nexus_tile.summary.CopyFrom(tilesummary)
        yield nexus_tile
    def test_run_chain_promote_var(self):
        processor_list = [{
            'name': 'GridReadingProcessor',
            'config': {
                'latitude': 'lat',
                'longitude': 'lon',
                'time': 'time',
                'variable_to_read': 'analysed_sst'
            }
        }, {
            'name': 'EmptyTileFilter',
            'config': {}
        }, {
            'name': 'KelvinToCelsius',
            'config': {}
        }, {
            'name': 'PromoteVariableToGlobalAttribute',
            'config': {
                'attribute_name': 'time_i',
                'variable_name': 'time',
                'dimensioned_by.0': 'time'
            }
        }, {
            'name': 'TileSummarizingProcessor',
            'config': {}
        }]
        processorchain = ProcessorChain(processor_list)

        test_file = path.join(path.dirname(__file__), 'datafiles',
                              'partial_empty_mur.nc4')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "time:0:1,lat:489:499,lon:0:10"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(processorchain.process(input_tile))

        self.assertEqual(1, len(results))
        tile = results[0]
        self.assertEqual("1104483600",
                         tile.summary.global_attributes[0].values[0])
    def test_read_not_empty_wswm(self):
        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_wswm.nc')

        wswm_reader = sdap.processors.TimeSeriesReadingProcessor('Qout', 'lat', 'lon', 'time')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "time:0:5832,rivid:0:1"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(wswm_reader.process(input_tile))

        self.assertEqual(1, len(results))

        for nexus_tile in results:
            self.assertTrue(nexus_tile.HasField('tile'))
            self.assertTrue(nexus_tile.tile.HasField('time_series_tile'))

            tile = nexus_tile.tile.time_series_tile
            self.assertEqual(1, from_shaped_array(tile.latitude).size)
            self.assertEqual(1, from_shaped_array(tile.longitude).size)
            self.assertEqual((5832, 1), from_shaped_array(tile.variable_data).shape)

        tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.variable_data))
        self.assertEqual(5832, np.ma.count(tile1_data))
        self.assertAlmostEqual(45.837,
                               np.ma.min(
                                   np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.latitude))),
                               places=3)
        self.assertAlmostEqual(-122.789,
                               np.ma.max(
                                   np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.longitude))),
                               places=3)

        tile1_times = from_shaped_array(results[0].tile.time_series_tile.time)
        self.assertEqual(852098400, tile1_times[0])
        self.assertEqual(915073200, tile1_times[-1])
        self.assertAlmostEqual(1.473,
                               np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.variable_data))[
                                   0, 0],
                               places=3)
    def test_read_not_empty_smap(self):
        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_smap.h5')

        swath_reader = sdap.processors.SwathReadingProcessor('smap_sss', 'lat', 'lon',
                                                             time='row_time',
                                                             glblattr_day='REV_START_TIME',
                                                             glblattr_day_format='%Y-%jT%H:%M:%S.%f')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "phony_dim_0:0:76,phony_dim_1:0:1"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(swath_reader.process(input_tile))

        self.assertEqual(1, len(results))

        # with open('./smap_nonempty_nexustile.bin', 'w') as f:
        #     f.write(results[0])

        for nexus_tile in results:
            self.assertTrue(nexus_tile.HasField('tile'))
            self.assertTrue(nexus_tile.tile.HasField('swath_tile'))
            self.assertEqual(0, len(nexus_tile.tile.swath_tile.meta_data))

            tile = nexus_tile.tile.swath_tile
            self.assertEqual(76, from_shaped_array(tile.latitude).size)
            self.assertEqual(76, from_shaped_array(tile.longitude).size)

        tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.variable_data))
        self.assertEqual((76, 1), tile1_data.shape)
        self.assertEqual(43, np.ma.count(tile1_data))
        self.assertAlmostEqual(-50.056,
                               np.ma.min(np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.latitude))),
                               places=3)
        self.assertAlmostEqual(-47.949,
                               np.ma.max(np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.latitude))),
                               places=3)

        self.assertEqual(1427820162, np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.time))[0])
    def process_nexus_tile(self, input_tile):
        tile_specifications, file_path = parse_input(input_tile, self.temp_dir)

        output_tile = nexusproto.NexusTile()
        output_tile.CopyFrom(input_tile)

        for tile in self.read_data(tile_specifications, file_path, output_tile):
            yield tile

        # If temp dir is defined, delete the temporary file
        if self.temp_dir is not None:
            remove(file_path)
Пример #9
0
def to_shaped_array(data_array):
    shaped_array = nexusproto.ShapedArray()

    shaped_array.shape.extend([dimension_size for dimension_size in data_array.shape])
    shaped_array.dtype = str(data_array.dtype)

    memfile = StringIO()
    numpy.save(memfile, data_array)
    shaped_array.array_data = memfile.getvalue()
    memfile.close()

    return shaped_array
Пример #10
0
    def read_data(self, tile_specifications, file_path, output_tile):
        with xr.decode_cf(xr.open_dataset(file_path, decode_cf=False),
                          decode_times=False) as ds:
            for section_spec, dimtoslice in tile_specifications:
                tile = nexusproto.SwathTile()
                # Time Lat Long Data and metadata should all be indexed by the same dimensions, order the incoming spec once using the data variable
                ordered_slices = get_ordered_slices(ds, self.variable_to_read,
                                                    dimtoslice)
                tile.latitude.CopyFrom(
                    to_shaped_array(
                        numpy.ma.filled(
                            ds[self.latitude].data[tuple(
                                ordered_slices.values())], numpy.NaN)))
                tile.longitude.CopyFrom(
                    to_shaped_array(
                        numpy.ma.filled(
                            ds[self.longitude].data[tuple(
                                ordered_slices.values())], numpy.NaN)))

                timetile = ds[self.time][tuple([
                    ordered_slices[time_dim] for time_dim in ds[self.time].dims
                ])].astype('float64', casting='same_kind', copy=False)
                timeunits = ds[self.time].attrs['units']
                try:
                    start_of_day_date = datetime.datetime.strptime(
                        ds.attrs[self.start_of_day], self.start_of_day_pattern)
                except Exception:
                    start_of_day_date = None

                for index in numpy.ndindex(timetile.shape):
                    timetile[index] = to_seconds_from_epoch(
                        timetile[index].item(),
                        timeunits=timeunits,
                        start_day=start_of_day_date,
                        timeoffset=self.time_offset)

                tile.time.CopyFrom(to_shaped_array(timetile))

                # Read the data converting masked values to NaN
                data_array = numpy.ma.filled(
                    ds[self.variable_to_read].data[tuple(
                        ordered_slices.values())], numpy.NaN)
                tile.variable_data.CopyFrom(to_shaped_array(data_array))

                if self.metadata is not None:
                    tile.meta_data.add().CopyFrom(
                        to_metadata(
                            self.metadata, ds[self.metadata].data[tuple(
                                ordered_slices.values())]))

                output_tile.tile.swath_tile.CopyFrom(tile)

                yield output_tile
    def process(self, tile, dataset, *args, **kwargs):
        tile_type = tile.tile.WhichOneof("tile_type")
        tile_data = getattr(tile.tile, tile_type)

        latitudes = numpy.ma.masked_invalid(
            from_shaped_array(tile_data.latitude))
        longitudes = numpy.ma.masked_invalid(
            from_shaped_array(tile_data.longitude))
        data = from_shaped_array(tile_data.variable_data)

        tile_summary = tile.summary if tile.HasField(
            "summary") else nexusproto.TileSummary()

        tile_summary.dataset_name = self._dataset_name
        tile_summary.bbox.lat_min = numpy.nanmin(latitudes).item()
        tile_summary.bbox.lat_max = numpy.nanmax(latitudes).item()
        tile_summary.bbox.lon_min = numpy.nanmin(longitudes).item()
        tile_summary.bbox.lon_max = numpy.nanmax(longitudes).item()
        tile_summary.stats.min = numpy.nanmin(data).item()
        tile_summary.stats.max = numpy.nanmax(data).item()
        tile_summary.stats.count = data.size - numpy.count_nonzero(
            numpy.isnan(data))

        # In order to accurately calculate the average we need to weight the data based on the cosine of its latitude
        # This is handled slightly differently for swath vs. grid data
        if tile_type == 'swath_tile':
            # For Swath tiles, len(data) == len(latitudes) == len(longitudes).
            # So we can simply weight each element in the data array
            tile_summary.stats.mean = type(self).calculate_mean_for_swath_tile(
                data, latitudes)
        elif tile_type == 'grid_tile':
            # Grid tiles need to repeat the weight for every longitude
            # TODO This assumes data axis' are ordered as latitude x longitude
            tile_summary.stats.mean = type(self).calculate_mean_for_grid_tile(
                data, latitudes, longitudes)
        else:
            # Default to simple average with no weighting
            tile_summary.stats.mean = numpy.nanmean(data).item()

        try:
            min_time, max_time = find_time_min_max(tile_data)
            tile_summary.stats.min_time = min_time
            tile_summary.stats.max_time = max_time
        except NoTimeException:
            pass

        standard_name = dataset.variables[
            tile_summary.data_var_name].attrs.get('standard_name')
        if standard_name:
            tile_summary.standard_name = standard_name

        tile.summary.CopyFrom(tile_summary)
        return tile
    def test_read_not_empty_avhrr(self):
        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_avhrr.nc4')

        avhrr_reader = sdap.processors.GridReadingProcessor('analysed_sst', 'lat', 'lon', time='time')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(avhrr_reader.process(input_tile))

        self.assertEqual(1, len(results))

        for nexus_tile in results:
            self.assertTrue(nexus_tile.HasField('tile'))
            self.assertTrue(nexus_tile.tile.HasField('grid_tile'))

            tile = nexus_tile.tile.grid_tile
            self.assertEqual(10, from_shaped_array(tile.latitude).size)
            self.assertEqual(10, from_shaped_array(tile.longitude).size)
            self.assertEqual((1, 10, 10), from_shaped_array(tile.variable_data).shape)

        tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.variable_data))
        self.assertEqual(100, np.ma.count(tile1_data))
        self.assertAlmostEqual(-39.875,
                               np.ma.min(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))),
                               places=3)
        self.assertAlmostEqual(-37.625,
                               np.ma.max(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))),
                               places=3)

        self.assertEqual(1462060800, results[0].tile.grid_tile.time)
        self.assertAlmostEqual(289.71,
                               np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.variable_data))[
                                   0, 0, 0],
                               places=3)
    def test_read_not_empty_ccmp(self):
        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_ccmp.nc')

        ccmp_reader = sdap.processors.GridReadingProcessor('uwnd', 'latitude', 'longitude', time='time', meta='vwnd')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "time:0:1,longitude:0:87,latitude:0:38"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(ccmp_reader.process(input_tile))

        self.assertEqual(1, len(results))

        # with open('./ccmp_nonempty_nexustile.bin', 'w') as f:
        #     f.write(results[0])

        for nexus_tile in results:
            self.assertTrue(nexus_tile.HasField('tile'))
            self.assertTrue(nexus_tile.tile.HasField('grid_tile'))
            self.assertEqual(1, len(nexus_tile.tile.grid_tile.meta_data))

            tile = nexus_tile.tile.grid_tile
            self.assertEqual(38, from_shaped_array(tile.latitude).size)
            self.assertEqual(87, from_shaped_array(tile.longitude).size)
            self.assertEqual((1, 38, 87), from_shaped_array(tile.variable_data).shape)

        tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.variable_data))
        self.assertEqual(3306, np.ma.count(tile1_data))
        self.assertAlmostEqual(-78.375,
                               np.ma.min(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))),
                               places=3)
        self.assertAlmostEqual(-69.125,
                               np.ma.max(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))),
                               places=3)

        self.assertEqual(1451606400, results[0].tile.grid_tile.time)
Пример #14
0
    def test_build_solr_doc_no_standard_name(self):
        """
        When TileSummary.standard_name isn't available, the solr field
        tile_var_name_s should use TileSummary.data_var_name
        """
        tile = nexusproto.NexusTile()
        tile.summary.tile_id = 'test_id'
        tile.summary.data_var_name = 'test_variable'
        tile.tile.ecco_tile.depth = 10.5

        metadata_store = SolrStore()
        solr_doc = metadata_store._build_solr_doc(tile)

        self.assertEqual('test_variable', solr_doc['tile_var_name_s'])
Пример #15
0
    def test_process(self):
        processor = GenerateTileId()

        tile = nexusproto.NexusTile()
        tile.summary.granule = 'test_dir/test_granule.nc'
        tile.summary.data_var_name = 'test_variable'
        tile.summary.section_spec = 'i:0:90,j:0:90,k:8:9,nv:0:2,tile:4:5,time:8:9'

        expected_id = uuid.uuid3(
            uuid.NAMESPACE_DNS, 'test_granule.nc' + 'test_variable' +
            'i:0:90,j:0:90,k:8:9,nv:0:2,tile:4:5,time:8:9')

        self.assertEqual(str(expected_id),
                         processor.process(tile).summary.tile_id)
    def process(self, tile, dataset: xr.Dataset, *args, **kwargs):
        try:
            dimensions_to_slices = self._convert_spec_to_slices(
                tile.summary.section_spec)

            output_tile = nexusproto.NexusTile()
            output_tile.CopyFrom(tile)
            output_tile.summary.data_var_name = self.variable

            return self._generate_tile(dataset, dimensions_to_slices,
                                       output_tile)
        except Exception as e:
            raise TileProcessingError(
                f"Could not generate tiles from the granule because of the following error: {e}."
            )
    def test_read_not_empty_ascatb_meta(self):
        # with open('./ascat_longitude_more_than_180.bin', 'w') as f:
        #     results = list(self.module.read_swath_data(None,
        #                                                "NUMROWS:0:1,NUMCELLS:0:82;NUMROWS:1:2,NUMCELLS:0:82;file:///Users/greguska/Downloads/ascat_longitude_more_than_180.nc4"))
        #     f.write(results[0])

        test_file = path.join(path.dirname(__file__), 'datafiles',
                              'not_empty_ascatb.nc4')

        swath_reader = sdap.processors.SwathReadingProcessor('wind_speed',
                                                             'lat',
                                                             'lon',
                                                             time='time',
                                                             meta='wind_dir')

        input_tile = nexusproto.NexusTile()
        tile_summary = nexusproto.TileSummary()
        tile_summary.granule = "file:%s" % test_file
        tile_summary.section_spec = "NUMROWS:0:1,NUMCELLS:0:82"
        input_tile.summary.CopyFrom(tile_summary)

        results = list(swath_reader.process(input_tile))

        self.assertEqual(1, len(results))

        for nexus_tile in results:
            self.assertTrue(nexus_tile.HasField('tile'))
            self.assertTrue(nexus_tile.tile.HasField('swath_tile'))
            self.assertLess(0, len(nexus_tile.tile.swath_tile.meta_data))

        self.assertEqual(1, len(results[0].tile.swath_tile.meta_data))
        tile1_meta_data = np.ma.masked_invalid(
            from_shaped_array(
                results[0].tile.swath_tile.meta_data[0].meta_data))
        self.assertEqual((1, 82), tile1_meta_data.shape)
        self.assertEqual(82, np.ma.count(tile1_meta_data))
Пример #18
0
    def read_data(self, tile_specifications, file_path, output_tile):
        # Time is optional for Grid data
        time = self.environ['TIME']

        with xr.decode_cf(xr.open_dataset(file_path, decode_cf=False),
                          decode_times=False) as ds:
            for section_spec, dimtoslice in tile_specifications:
                tile = nexusproto.GridTile()

                tile.latitude.CopyFrom(
                    to_shaped_array(
                        numpy.ma.filled(
                            ds[self.latitude].data[dimtoslice[self.y_dim]],
                            numpy.NaN)))
                tile.longitude.CopyFrom(
                    to_shaped_array(
                        numpy.ma.filled(
                            ds[self.longitude].data[dimtoslice[self.x_dim]],
                            numpy.NaN)))
                # Before we read the data we need to make sure the dimensions are in the proper order so we don't have any
                #  indexing issues
                ordered_slices = get_ordered_slices(ds, self.variable_to_read,
                                                    dimtoslice)
                # Read data using the ordered slices, replacing masked values with NaN
                data_array = numpy.ma.filled(
                    ds[self.variable_to_read].data[tuple(
                        ordered_slices.values())], numpy.NaN)

                tile.variable_data.CopyFrom(to_shaped_array(data_array))

                if self.metadata is not None:
                    tile.meta_data.add().CopyFrom(
                        to_metadata(
                            self.metadata, ds[self.metadata].data[tuple(
                                ordered_slices.values())]))

                if time is not None:
                    timevar = ds[time]
                    # Note assumption is that index of time is start value in dimtoslice
                    tile.time = to_seconds_from_epoch(
                        timevar.data[dimtoslice[time].start],
                        timeunits=timevar.attrs['units'],
                        timeoffset=self.time_offset)

                output_tile.tile.grid_tile.CopyFrom(tile)

                yield output_tile
Пример #19
0
    def test_build_solr_doc(self):
        tile = nexusproto.NexusTile()
        tile.summary.tile_id = 'test_id'
        tile.summary.dataset_name = 'test_dataset'
        tile.summary.dataset_uuid = 'test_dataset_id'
        tile.summary.data_var_name = 'test_variable'
        tile.summary.granule = 'test_granule_path'
        tile.summary.section_spec = 'time:0:1,j:0:20,i:200:240'
        tile.summary.bbox.lat_min = -180.1
        tile.summary.bbox.lat_max = 180.2
        tile.summary.bbox.lon_min = -90.5
        tile.summary.bbox.lon_max = 90.0
        tile.summary.stats.min = -10.0
        tile.summary.stats.max = 25.5
        tile.summary.stats.mean = 12.5
        tile.summary.stats.count = 100
        tile.summary.stats.min_time = 694224000
        tile.summary.stats.max_time = 694310400
        tile.summary.standard_name = 'sea_surface_temperature'

        tile.tile.ecco_tile.depth = 10.5

        metadata_store = SolrStore()
        solr_doc = metadata_store._build_solr_doc(tile)

        self.assertEqual('sea_surface_temp', solr_doc['table_s'])
        self.assertEqual(
            'POLYGON((-90.500 -180.100, 90.000 -180.100, 90.000 180.200, -90.500 180.200, -90.500 -180.100))',
            solr_doc['geo'])
        self.assertEqual('test_id', solr_doc['id'])
        self.assertEqual('test_dataset!test_id', solr_doc['solr_id_s'])
        self.assertEqual('time:0:1,j:0:20,i:200:240',
                         solr_doc['sectionSpec_s'])
        self.assertEqual('test_granule_path', solr_doc['granule_s'])
        self.assertEqual('sea_surface_temperature',
                         solr_doc['tile_var_name_s'])
        self.assertAlmostEqual(-90.5, solr_doc['tile_min_lon'])
        self.assertAlmostEqual(90.0, solr_doc['tile_max_lon'])
        self.assertAlmostEqual(-180.1, solr_doc['tile_min_lat'], delta=1E-5)
        self.assertAlmostEqual(180.2, solr_doc['tile_max_lat'], delta=1E-5)
        self.assertEqual('1992-01-01T00:00:00Z', solr_doc['tile_min_time_dt'])
        self.assertEqual('1992-01-02T00:00:00Z', solr_doc['tile_max_time_dt'])
        self.assertAlmostEqual(-10.0, solr_doc['tile_min_val_d'])
        self.assertAlmostEqual(25.5, solr_doc['tile_max_val_d'])
        self.assertAlmostEqual(12.5, solr_doc['tile_avg_val_d'])
        self.assertEqual(100, solr_doc['tile_count_i'])
        self.assertAlmostEqual(10.5, solr_doc['tile_depth'])
    def test_read_not_empty_wswm(self):
        reading_processor = TimeSeriesReadingProcessor('Qout', 'lat', 'lon', time='time')
        granule_path = path.join(path.dirname(__file__), '../granules/not_empty_wswm.nc')

        input_tile = nexusproto.NexusTile()
        input_tile.summary.granule = granule_path

        dimensions_to_slices = {
            'time': slice(0, 5832),
            'rivid': slice(0, 1),
        }
        with xr.open_dataset(granule_path) as ds:
            output_tile = reading_processor._generate_tile(ds, dimensions_to_slices, input_tile)

            self.assertEqual(granule_path, output_tile.summary.granule, granule_path)
            self.assertEqual([5832], output_tile.tile.time_series_tile.time.shape)
            self.assertEqual([5832, 1], output_tile.tile.time_series_tile.variable_data.shape)
            self.assertEqual([1], output_tile.tile.time_series_tile.latitude.shape)
            self.assertEqual([1], output_tile.tile.time_series_tile.longitude.shape)
    def _generate_tile(self, ds: xr.Dataset,
                       dimensions_to_slices: Dict[str, slice], input_tile):
        new_tile = nexusproto.GridTile()

        lat_subset = ds[self.latitude][type(self)._slices_for_variable(
            ds[self.latitude], dimensions_to_slices)]
        lon_subset = ds[self.longitude][type(self)._slices_for_variable(
            ds[self.longitude], dimensions_to_slices)]
        lat_subset = np.ma.filled(np.squeeze(lat_subset), np.NaN)
        lon_subset = np.ma.filled(np.squeeze(lon_subset), np.NaN)

        data_subset = ds[self.variable][type(self)._slices_for_variable(
            ds[self.variable], dimensions_to_slices)]
        data_subset = np.ma.filled(np.squeeze(data_subset), np.NaN)

        if self.depth:
            depth_dim, depth_slice = list(
                type(self)._slices_for_variable(
                    ds[self.depth], dimensions_to_slices).items())[0]
            depth_slice_len = depth_slice.stop - depth_slice.start
            if depth_slice_len > 1:
                raise RuntimeError(
                    "Depth slices must have length 1, but '{dim}' has length {dim_len}."
                    .format(dim=depth_dim, dim_len=depth_slice_len))
            new_tile.depth = ds[self.depth][depth_slice].item()

        if self.time:
            time_slice = dimensions_to_slices[self.time]
            time_slice_len = time_slice.stop - time_slice.start
            if time_slice_len > 1:
                raise RuntimeError(
                    "Time slices must have length 1, but '{dim}' has length {dim_len}."
                    .format(dim=self.time, dim_len=time_slice_len))
            new_tile.time = int(ds[self.time][time_slice.start].item() / 1e9)

        new_tile.latitude.CopyFrom(to_shaped_array(lat_subset))
        new_tile.longitude.CopyFrom(to_shaped_array(lon_subset))
        new_tile.variable_data.CopyFrom(to_shaped_array(data_subset))

        input_tile.tile.grid_tile.CopyFrom(new_tile)
        return input_tile
Пример #22
0
    def read_tile(self):
        reading_processor = GridReadingProcessor('B03',
                                                 'lat',
                                                 'lon',
                                                 time='time')
        granule_path = path.join(
            path.dirname(__file__),
            '../granules/HLS.S30.T11SPC.2020001.v1.4.hdf.nc')

        input_tile = nexusproto.NexusTile()
        input_tile.summary.granule = granule_path

        dimensions_to_slices = {
            'time': slice(0, 1),
            'lat': slice(0, 30),
            'lon': slice(0, 30)
        }

        with xr.open_dataset(granule_path) as ds:
            return reading_processor._generate_tile(ds, dimensions_to_slices,
                                                    input_tile)
Пример #23
0
def to_metadata(name, data_array):
    metadata = nexusproto.MetaData()
    metadata.name = name
    metadata.meta_data.CopyFrom(to_shaped_array(data_array))

    return metadata