Пример #1
0
    def test_convert(self):
        coords = [(0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0)]
        expected = Polygon(coords)
        actual = PolygonLike.convert(coords)
        self.assertTrue(actual, expected)

        with self.assertRaises(ValueError) as err:
            PolygonLike.convert('aaa')
        self.assertEqual('cannot convert geometry to a valid Polygon: aaa',
                         str(err.exception))
        self.assertEqual(None, PolygonLike.convert(None))
Пример #2
0
    def test_load_old_datasource_from_json_dict(self):
        test_data = {
            'name':
            'local.test_name',
            'meta_data': {
                'temporal_coverage': "2001-01-01 00:00:00,2001-01-31 23:59:59",
                'spatial_coverage': "-180,-90,180,90",
                'variables': ['var_test_1', 'var_test_2'],
            },
            "meta_info": {},
            'files': [['file_1', '2002-02-01 00:00:00', '2002-02-01 23:59:59'],
                      ['file_2', '2002-03-01 00:00:00', '2002-03-01 23:59:59']]
        }
        data_source = LocalDataSource.from_json_dict(
            json_dict=test_data, data_store=self.data_store)
        self.assertIsNotNone(data_source)
        self.assertEqual(
            data_source.temporal_coverage(),
            TimeRangeLike.convert(
                test_data.get('meta_data').get('temporal_coverage')))
        self.assertEqual(
            data_source.spatial_coverage(),
            PolygonLike.convert(
                test_data.get('meta_data').get('spatial_coverage')))
        self.assertListEqual(
            [var.get('name') for var in data_source.variables_info],
            test_data.get('meta_data').get('variables'))

        test_data = {
            'name':
            'local.test_name',
            'meta_data': {
                'temporal_covrage': "2001-01-01 00:00:00,2001-01-31 23:59:59",
                'spatial_coverage': "-180,-90,180,90",
                'variables': ['var_test_1', 'var_test_2'],
            },
            "meta_info": {},
            'files': [['file_1', '2002-02-01 00:00:00', '2002-02-01 23:59:59'],
                      ['file_2', '2002-03-01 00:00:00', '2002-03-01 23:59:59']]
        }
        data_source = LocalDataSource.from_json_dict(
            json_dict=test_data, data_store=self.data_store)
        self.assertIsNotNone(data_source)
        self.assertEqual(
            data_source.temporal_coverage(),
            TimeRangeLike.convert(
                test_data.get('meta_data').get('temporal_covrage')))
        self.assertEqual(
            data_source.spatial_coverage(),
            PolygonLike.convert(
                test_data.get('meta_data').get('spatial_coverage')))
        self.assertListEqual(
            [var.get('name') for var in data_source.variables_info],
            test_data.get('meta_data').get('variables'))
Пример #3
0
    def test_make_local(self):
        data_source = self._local_data_store.query('local_w_temporal')[0]

        with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):
            new_ds_title = 'from_local_to_local'
            new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                      datetime.datetime(1978, 11, 15, 23, 59)))
            new_ds = data_source.make_local(new_ds_title, time_range=new_ds_time_range)
            self.assertIsNotNone(new_ds)

            self.assertEqual(new_ds.id, "local.%s" % new_ds_title)
            self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert(
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59))))

            new_ds_2_title = 'from_local_to_local_var'
            new_ds_2_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                         datetime.datetime(1978, 11, 15, 23, 59)))
            new_ds_2_vars = VarNamesLike.convert(['sm'])

            new_ds_w_one_variable = data_source.make_local(new_ds_2_title,
                                                           time_range=new_ds_2_time_range,
                                                           var_names=new_ds_2_vars)
            self.assertIsNotNone(new_ds_w_one_variable)
            self.assertEqual(new_ds_w_one_variable.id, "local.%s" % new_ds_2_title)
            data_set = new_ds_w_one_variable.open_dataset()
            self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})

            new_ds_3_title = 'from_local_to_local_range'
            new_ds_3_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                         datetime.datetime(1978, 11, 15, 23, 59)))
            new_ds_3_vars = VarNamesLike.convert(['sm'])
            new_ds_3_region = PolygonLike.convert("10,10,20,20")

            new_ds_w_region = data_source.make_local(new_ds_3_title,
                                                     time_range=new_ds_3_time_range,
                                                     var_names=new_ds_3_vars,
                                                     region=new_ds_3_region)  # type: LocalDataSource
            self.assertIsNotNone(new_ds_w_region)
            self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_3_title)
            self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20"))
            data_set = new_ds_w_region.open_dataset()
            self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})

            no_data = data_source.make_local('no_data',
                                             time_range=(datetime.datetime(2020, 11, 14, 0, 0),
                                                         datetime.datetime(2020, 11, 15, 23, 59)))
            self.assertIsNone(no_data)
Пример #4
0
def data_frame_subset(gdf: gpd.GeoDataFrame,
                      region_op: bool = 'intersects',
                      region: PolygonLike.TYPE = None,
                      var_names: VarNamesLike.TYPE = None) -> gpd.GeoDataFrame:
    """
    Create a GeoDataFrame subset from given variables (data frame columns) and/or region.

    :param gdf: A GeoDataFrame.
    :param region_op: The geometric operation to be performed if *region* is given.
    :param region: A region polygon used to filter rows.
    :param var_names: The variables (columns) to select.
    :return: A GeoDataFrame subset.
    """

    region = PolygonLike.convert(region)

    var_names = VarNamesLike.convert(var_names)

    if not var_names and not region:
        return gdf

    if var_names:
        if 'geometry' not in var_names:
            var_names = ['geometry'] + var_names
        gdf = gdf[var_names]

    if region and region_op:
        geom_str = PolygonLike.format(region)
        gdf = data_frame_query(gdf, f'@{region_op}("{geom_str}")')

    return gdf
Пример #5
0
def data_frame_subset(gdf: gpd.GeoDataFrame,
                      region_op: bool = 'intersects',
                      region: PolygonLike.TYPE = None,
                      var_names: VarNamesLike.TYPE = None) -> gpd.GeoDataFrame:
    """
    Create a GeoDataFrame subset from given variables (data frame columns) and/or region.

    :param gdf: A GeoDataFrame.
    :param region_op: The geometric operation to be performed if *region* is given.
    :param region: A region polygon used to filter rows.
    :param var_names: The variables (columns) to select.
    :return: A GeoDataFrame subset.
    """

    region = PolygonLike.convert(region)

    var_names = VarNamesLike.convert(var_names)

    if not var_names and not region:
        return gdf

    if var_names:
        if 'geometry' not in var_names:
            var_names = ['geometry'] + var_names
        gdf = gdf[var_names]

    if region and region_op:
        geom_str = PolygonLike.format(region)
        gdf = data_frame_query(gdf, f'@{region_op}("{geom_str}")')

    return gdf
Пример #6
0
def anomaly_internal(ds: xr.Dataset,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly using as reference data the mean of an optional region
    and time slice from the given dataset. If no time slice/spatial region is
    given, the operation will calculate anomaly using the mean of the whole
    dataset as the reference.

    This is done for each data array in the dataset.
    :param ds: The dataset to calculate anomalies from
    :param time_range: Time range to use for reference data
    :param region: Spatial region to use for reference data
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    ref = ds.copy()
    if time_range:
        time_range = TimeRangeLike.convert(time_range)
        ref = subset_temporal(ref, time_range)
    if region:
        region = PolygonLike.convert(region)
        ref = subset_spatial(ref, region)
    with monitor.observing("Calculating anomaly"):
        ref = ref.mean(keep_attrs=True, skipna=True)
        diff = ds - ref
    return diff
Пример #7
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'Open Data Portal\'s data source \'{}\' does not seem to have any data sets'.format(self.id)
            if time_range is not None:
                msg += ' in given time range {}'.format(TimeRangeLike.format(time_range))
            raise DataAccessError(None, msg)

        files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP)
        try:
            ds = open_xarray_dataset(files)
            if region:
                ds = subset_spatial_impl(ds, region)
            if var_names:
                ds = ds.drop([var_name for var_name in ds.data_vars.keys() if var_name not in var_names])
            return ds

        except OSError as e:
            if time_range:
                raise DataAccessError(self, "Cannot open remote dataset for time range: {}\n"
                                            "Error details: {}"
                                      .format(TimeRangeLike.format(time_range), e))
            else:
                raise DataAccessError(self, "Cannot open remote dataset\n"
                                            "Error details: {}"
                                      .format(TimeRangeLike.format(time_range), e))
Пример #8
0
def anomaly_internal(ds: xr.Dataset,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly using as reference data the mean of an optional region
    and time slice from the given dataset. If no time slice/spatial region is
    given, the operation will calculate anomaly using the mean of the whole
    dataset as the reference.

    This is done for each data array in the dataset.
    :param ds: The dataset to calculate anomalies from
    :param time_range: Time range to use for reference data
    :param region: Spatial region to use for reference data
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    ref = ds.copy()
    if time_range:
        time_range = TimeRangeLike.convert(time_range)
        ref = subset_temporal(ref, time_range)
    if region:
        region = PolygonLike.convert(region)
        ref = subset_spatial(ref, region)
    with monitor.observing("Calculating anomaly"):
        ref = ref.mean(keep_attrs=True, skipna=True)
        diff = ds - ref
    return diff
Пример #9
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     time_range = TimeRangeLike.convert(time_range) if time_range else None
     if region:
         region = PolygonLike.convert(region)
     if var_names:
         var_names = VarNamesLike.convert(var_names)
     paths = []
     if time_range:
         time_series = list(self._files.values())
         file_paths = list(self._files.keys())
         for i in range(len(time_series)):
             if time_series[i]:
                 if isinstance(time_series[i], Tuple) and \
                         time_series[i][0] >= time_range[0] and \
                         time_series[i][1] <= time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
                 elif isinstance(
                         time_series[i], datetime
                 ) and time_range[0] <= time_series[i] < time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
     else:
         for file in self._files.items():
             paths.extend(self._resolve_file_path(file[0]))
     if paths:
         paths = sorted(set(paths))
         try:
             ds = open_xarray_dataset(paths)
             if region:
                 ds = normalize_impl(ds)
                 ds = subset_spatial_impl(ds, region)
             if var_names:
                 ds = ds.drop([
                     var_name for var_name in ds.data_vars.keys()
                     if var_name not in var_names
                 ])
             return ds
         except OSError as e:
             if time_range:
                 raise DataAccessError(
                     "Cannot open local dataset for time range {}:\n"
                     "{}".format(TimeRangeLike.format(time_range), e),
                     source=self) from e
             else:
                 raise DataAccessError("Cannot open local dataset:\n"
                                       "{}".format(e),
                                       source=self) from e
     else:
         if time_range:
             raise DataAccessError(
                 "No local datasets available for\nspecified time range {}".
                 format(TimeRangeLike.format(time_range)),
                 source=self)
         else:
             raise DataAccessError("No local datasets available",
                                   source=self)
Пример #10
0
 def test_format(self):
     self.assertEqual(PolygonLike.format(None), '')
     coords = [(10.4, 20.2), (30.8, 20.2), (30.8, 40.8), (10.4, 40.8)]
     pol = PolygonLike.convert(coords)
     self.assertEqual(
         PolygonLike.format(pol),
         'POLYGON ((10.4 20.2, 30.8 20.2, 30.8 40.8, 10.4 40.8, 10.4 20.2))'
     )
Пример #11
0
 def spatial_coverage(self):
     if not self._spatial_coverage and \
             set(self._meta_info.keys()).issuperset({'bbox_minx', 'bbox_miny', 'bbox_maxx', 'bbox_maxy'}):
         self._spatial_coverage = PolygonLike.convert(",".join([
             self._meta_info.get('bbox_minx'),
             self._meta_info.get('bbox_miny'),
             self._meta_info.get('bbox_maxx'),
             self._meta_info.get('bbox_maxy')])
         )
     return self._spatial_coverage
Пример #12
0
 def spatial_coverage(self):
     if not self._spatial_coverage and \
             set(self._meta_info.keys()).issuperset({'bbox_minx', 'bbox_miny', 'bbox_maxx', 'bbox_maxy'}):
         self._spatial_coverage = PolygonLike.convert(",".join([
             self._meta_info.get('bbox_minx'),
             self._meta_info.get('bbox_miny'),
             self._meta_info.get('bbox_maxx'),
             self._meta_info.get('bbox_maxy')
         ]))
     return self._spatial_coverage
Пример #13
0
    def test_make_local(self):
        data_source = self._local_data_store.query('local_w_temporal')[0]

        with unittest.mock.patch.object(EsaCciOdpDataStore,
                                        'query',
                                        return_value=[]):
            new_ds = data_source.make_local(
                'from_local_to_local', None,
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59)))
            self.assertEqual(new_ds.name, 'local.from_local_to_local')
            self.assertEqual(
                new_ds.temporal_coverage(),
                TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                       datetime.datetime(1978, 11, 15, 23,
                                                         59))))

            data_source.update_local(new_ds.name,
                                     (datetime.datetime(1978, 11, 15, 00, 00),
                                      datetime.datetime(1978, 11, 16, 23, 59)))
            self.assertEqual(
                new_ds.temporal_coverage(),
                TimeRangeLike.convert((datetime.datetime(1978, 11, 15, 0, 0),
                                       datetime.datetime(1978, 11, 16, 23,
                                                         59))))

            with self.assertRaises(ValueError) as context:
                data_source.update_local(
                    "wrong_ds_name", (datetime.datetime(1978, 11, 15, 00, 00),
                                      datetime.datetime(1978, 11, 16, 23, 59)))
            self.assertTrue("Couldn't find local DataSource",
                            context.exception.args[0])

            new_ds_w_one_variable = data_source.make_local(
                'from_local_to_local_var', None,
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm'])
            self.assertEqual(new_ds_w_one_variable.name,
                             'local.from_local_to_local_var')
            data_set = new_ds_w_one_variable.open_dataset()
            self.assertSetEqual(set(data_set.variables),
                                {'sm', 'lat', 'lon', 'time'})

            new_ds_w_region = data_source.make_local(
                'from_local_to_local_region', None,
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59)), "10,10,20,20",
                ['sm'])  # type: LocalDataSource
            self.assertEqual(new_ds_w_region.name,
                             'local.from_local_to_local_region')
            self.assertEqual(new_ds_w_region.spatial_coverage(),
                             PolygonLike.convert("10,10,20,20"))
            data_set = new_ds_w_region.open_dataset()
            self.assertSetEqual(set(data_set.variables),
                                {'sm', 'lat', 'lon', 'time'})
Пример #14
0
 def spatial_coverage(self) -> Optional[PolygonLike]:
     if self._catalogue_data \
             and self._catalogue_data.get('bbox_minx', None) and self._catalogue_data.get('bbox_miny', None) \
             and self._catalogue_data.get('bbox_maxx', None) and self._catalogue_data.get('bbox_maxy', None):
         return PolygonLike.convert([
             self._catalogue_data.get('bbox_minx'),
             self._catalogue_data.get('bbox_miny'),
             self._catalogue_data.get('bbox_maxx'),
             self._catalogue_data.get('bbox_maxy')
         ])
     return None
Пример #15
0
    def make_local(self,
                   local_name: str,
                   local_id: str = None,
                   time_range: TimeRangeLike.TYPE = None,
                   region: PolygonLike.TYPE = None,
                   var_names: VarNamesLike.TYPE = None,
                   monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            add_to_data_store_registry()
            local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            raise ValueError('Cannot initialize `local` DataStore')

        _uuid = LocalDataStore.generate_uuid(ref_id=self.id, time_range=time_range, region=region, var_names=var_names)

        if not local_name or len(local_name) == 0:
            local_name = "local.{}.{}".format(self.id, _uuid)
            existing_ds_list = local_store.query(ds_id=local_name)
            if len(existing_ds_list) == 1:
                return existing_ds_list[0]
        else:
            existing_ds_list = local_store.query(ds_id='local.%s' % local_name)
            if len(existing_ds_list) == 1:
                if existing_ds_list[0].meta_info.get('uuid', None) == _uuid:
                    return existing_ds_list[0]
                else:
                    raise ValueError('Datastore {} already contains dataset {}'.format(local_store.id, local_name))

        local_meta_info = self.meta_info.copy()
        local_meta_info['ref_uuid'] = local_meta_info.get('uuid', None)
        local_meta_info['uuid'] = _uuid

        local_ds = local_store.create_data_source(local_name, region, local_name,
                                                  time_range=time_range, var_names=var_names,
                                                  meta_info=self.meta_info.copy())
        if local_ds:
            if not local_ds.is_complete:
                self._make_local(local_ds, time_range, region, var_names, monitor=monitor)

            if local_ds.is_empty:
                local_store.remove_data_source(local_ds)
                return None

            local_store.register_ds(local_ds)
            return local_ds
        return None
Пример #16
0
def subset_spatial(ds: xr.Dataset,
                   region: PolygonLike.TYPE,
                   mask: bool = True) -> xr.Dataset:
    """
    Do a spatial subset of the dataset

    :param ds: Dataset to subset
    :param region: Spatial region to subset
    :param mask: Should values falling in the bounding box of the polygon but not the polygon itself be masked with NaN.
    :return: Subset dataset
    """
    region = PolygonLike.convert(region)
    return adjust_spatial_attrs(subset_spatial_impl(ds, region, mask))
Пример #17
0
 def test_load_datasource_from_json_dict(self):
     test_data = {
         'name':
         'local.test_name2',
         "meta_info": {
             "temporal_coverage_start":
             "2001-01-01T00:00:00",
             "temporal_coverage_end":
             "2001-01-31T23:59:59",
             "bbox_maxx":
             "180.0",
             "bbox_maxy":
             "90.0",
             "bbox_minx":
             "-180.0",
             "bbox_miny":
             "-90.0",
             "variables": [{
                 "name": "var_1",
                 "units": "kelvin",
                 "long_name": "var_1 long name..",
                 "standard_name": "std_var_1"
             }, {
                 "name": "var_2",
                 "units": "celsius",
                 "long_name": "var_2 long name..",
                 "standard_name": "std_var_2"
             }]
         },
         'files': [['file_1', '2002-02-01 00:00:00', '2002-02-01 23:59:59'],
                   ['file_2', '2002-03-01 00:00:00', '2002-03-01 23:59:59']]
     }
     data_source = LocalDataSource.from_json_dict(
         json_dict=test_data, data_store=self.data_store)
     self.assertIsNotNone(data_source)
     self.assertEqual(
         data_source.temporal_coverage(),
         TimeRangeLike.convert("{},{}".format(
             test_data.get('meta_info').get('temporal_coverage_start'),
             test_data.get('meta_info').get('temporal_coverage_end'))))
     self.assertEqual(
         data_source.spatial_coverage(),
         PolygonLike.convert(",".join([
             test_data.get('meta_info').get('bbox_minx'),
             test_data.get('meta_info').get('bbox_miny'),
             test_data.get('meta_info').get('bbox_maxx'),
             test_data.get('meta_info').get('bbox_maxy'),
         ])))
     self.assertListEqual(data_source.variables_info,
                          test_data.get('meta_info').get('variables'))
Пример #18
0
def _crosses_antimeridian(region: PolygonLike.TYPE) -> bool:
    """
    Determine if the given region crosses the Antimeridian line, by converting
    the given Polygon from -180;180 to 0;360 and checking if the antimeridian
    line crosses it.

    This only works with Polygons without holes

    :param region: PolygonLike to test
    """
    region = PolygonLike.convert(region)

    # Retrieving the points of the Polygon are a bit troublesome, parsing WKT
    # is more straightforward and probably faster
    new_wkt = 'POLYGON (('

    # [10:-2] gets rid of POLYGON (( and ))
    for point in dumps(region)[10:-2].split(','):
        point = point.strip()
        lon, lat = point.split(' ')
        lon = float(lon)
        if -180 <= lon < 0:
            lon += 360
        new_wkt += '{} {}, '.format(lon, lat)
    new_wkt = new_wkt[:-2] + '))'

    converted = loads(new_wkt)

    # There's a problem at this point. Any polygon crossed by the zeroth
    # meridian can in principle convert to an inverted polygon that is crossed
    # by the antimeridian.

    if not converted.is_valid:
        # The polygon 'became' invalid upon conversion => probably the original
        # polygon is what we want
        return False

    test_line = LineString([(180, -90), (180, 90)])
    if test_line.crosses(converted):
        # The converted polygon seems to be valid and crossed by the
        # antimeridian. At this point there's no 'perfect' way how to tell if
        # we wanted the converted polygon or the original one.

        # A simple heuristic is to check for size. The smaller one is quite
        # likely the desired one
        if converted.area < region.area:
            return True
        else:
            return False
Пример #19
0
    def __init__(self,
                 ds_id: str,
                 files: Union[Sequence[str], OrderedDict],
                 data_store: 'LocalDataStore',
                 temporal_coverage: TimeRangeLike.TYPE = None,
                 spatial_coverage: PolygonLike.TYPE = None,
                 variables: VarNamesLike.TYPE = None,
                 meta_info: dict = None,
                 status: DataSourceStatus = None):
        self._id = ds_id
        if isinstance(files, Sequence):
            self._files = OrderedDict.fromkeys(files)
        else:
            self._files = files
        self._data_store = data_store

        initial_temporal_coverage = TimeRangeLike.convert(
            temporal_coverage) if temporal_coverage else None
        if not initial_temporal_coverage:
            files_number = len(self._files.items())
            if files_number > 0:
                files_range = list(self._files.values())
                if files_range:
                    if isinstance(files_range[0], Tuple):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            tuple([
                                files_range[0][0],
                                files_range[files_number - 1][1]
                            ]))
                    elif isinstance(files_range[0], datetime):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            (files_range[0], files_range[files_number - 1]))

        self._temporal_coverage = initial_temporal_coverage
        self._spatial_coverage = PolygonLike.convert(
            spatial_coverage) if spatial_coverage else None
        self._variables = VarNamesLike.convert(variables) if variables else []

        self._meta_info = meta_info if meta_info else OrderedDict()

        if self._variables and not self._meta_info.get('variables', None):
            self._meta_info['variables'] = [{
                'name': var_name,
                'units': '',
                'long_name': '',
                'standard_name': ''
            } for var_name in self._variables]

        self._status = status if status else DataSourceStatus.READY
Пример #20
0
def _generic_index_calculation(
        ds: xr.Dataset,
        var: VarName.TYPE,
        region: PolygonLike.TYPE,
        window: int,
        file: str,
        name: str,
        threshold: float = None,
        monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    A generic index calculation. Where an index is defined as an anomaly
    against the given reference of a moving average of the given window size of
    the given given region of the given variable of the given dataset.

    :param ds: Dataset from which to calculate the index
    :param var: Variable from which to calculate index
    :param region: Spatial subset from which to calculate the index
    :param window: Window size for the moving average
    :param file: Path to the reference file
    :param threshold: Absolute threshold that indicates an ENSO event
    :param name: Name of the index
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries
    """
    var = VarName.convert(var)
    region = PolygonLike.convert(region)

    with monitor.starting("Calculate the index", total_work=2):
        ds = select_var(ds, var)
        ds_subset = subset_spatial(ds, region)
        anom = anomaly_external(ds_subset, file, monitor=monitor.child(1))
        with monitor.child(1).observing("Calculate mean"):
            ts = anom.mean(dim=['lat', 'lon'])
        df = pd.DataFrame(data=ts[var].values,
                          columns=[name],
                          index=ts.time.values)
        retval = df.rolling(window=window, center=True).mean().dropna()

    if threshold is None:
        return retval

    retval['El Nino'] = pd.Series((retval[name] > threshold),
                                  index=retval.index)
    retval['La Nina'] = pd.Series((retval[name] < -threshold),
                                  index=retval.index)
    return retval
Пример #21
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     time_range = TimeRangeLike.convert(time_range) if time_range else None
     if region:
         region = PolygonLike.convert(region)
     if var_names:
         var_names = VarNamesLike.convert(var_names)
     paths = []
     if time_range:
         time_series = list(self._files.values())
         file_paths = list(self._files.keys())
         for i in range(len(time_series)):
             if time_series[i]:
                 if isinstance(time_series[i], Tuple) and \
                         time_series[i][0] >= time_range[0] and \
                         time_series[i][1] <= time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
                 elif isinstance(time_series[i], datetime) and \
                         time_range[0] <= time_series[i] < time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
     else:
         for file in self._files.items():
             paths.extend(self._resolve_file_path(file[0]))
     if paths:
         paths = sorted(set(paths))
         try:
             ds = open_xarray_dataset(paths)
             if region:
                 [lat_min, lon_min, lat_max, lon_max] = region.bounds
                 ds = ds.sel(drop=False,
                             lat=slice(lat_min, lat_max),
                             lon=slice(lon_min, lon_max))
             if var_names:
                 ds = ds.drop([
                     var_name for var_name in ds.variables.keys()
                     if var_name not in var_names
                 ])
             return ds
         except OSError as e:
             raise IOError("Files: {} caused:\nOSError({}): {}".format(
                 paths, e.errno, e.strerror))
     else:
         return None
Пример #22
0
    def test_convert(self):
        self.assertEqual(PolygonLike.convert(None), None)
        self.assertEqual(PolygonLike.convert(''), None)
        coords = [(10.4, 20.2), (30.8, 20.2), (30.8, 40.8), (10.4, 40.8)]
        self.assertTrue(PolygonLike.convert(coords), Polygon(coords))
        self.assertTrue(PolygonLike.convert([10.4, 20.2, 30.8, 40.8]), Polygon(coords))

        with self.assertRaises(ValidationError) as err:
            PolygonLike.convert('aaa')
        self.assertEqual(str(err.exception),
                         "Value cannot be converted into a 'PolygonLike': "
                         "Invalid geometry WKT format.")
Пример #23
0
    def test_convert(self):
        self.assertEqual(PolygonLike.convert(None), None)
        self.assertEqual(PolygonLike.convert(''), None)
        coords = [(10.4, 20.2), (30.8, 20.2), (30.8, 40.8), (10.4, 40.8)]
        self.assertTrue(PolygonLike.convert(coords), Polygon(coords))
        self.assertTrue(PolygonLike.convert([10.4, 20.2, 30.8, 40.8]),
                        Polygon(coords))

        with self.assertRaises(ValueError) as err:
            PolygonLike.convert('aaa')
        self.assertEqual(str(err.exception),
                         'cannot convert value <aaa> to PolygonLike')
Пример #24
0
    def __init__(self,
                 ds_id: str,
                 files: Union[Sequence[str], OrderedDict],
                 data_store: 'LocalDataStore',
                 temporal_coverage: TimeRangeLike.TYPE = None,
                 spatial_coverage: PolygonLike.TYPE = None,
                 variables: VarNamesLike.TYPE = None,
                 meta_info: dict = None,
                 status: DataSourceStatus = None):
        self._id = ds_id
        if isinstance(files, Sequence):
            self._files = OrderedDict.fromkeys(files)
        else:
            self._files = files
        self._data_store = data_store

        initial_temporal_coverage = TimeRangeLike.convert(temporal_coverage) if temporal_coverage else None
        if not initial_temporal_coverage:
            files_number = len(self._files.items())
            if files_number > 0:
                files_range = list(self._files.values())
                if files_range:
                    if isinstance(files_range[0], Tuple):
                        initial_temporal_coverage = TimeRangeLike.convert(tuple([files_range[0][0],
                                                                                 files_range[files_number - 1][1]]))
                    elif isinstance(files_range[0], datetime):
                        initial_temporal_coverage = TimeRangeLike.convert((files_range[0],
                                                                           files_range[files_number - 1]))

        self._temporal_coverage = initial_temporal_coverage
        self._spatial_coverage = PolygonLike.convert(spatial_coverage) if spatial_coverage else None
        self._variables = VarNamesLike.convert(variables) if variables else []

        self._meta_info = meta_info if meta_info else OrderedDict()

        if self._variables and not self._meta_info.get('variables', None):
            self._meta_info['variables'] = [
                {'name': var_name,
                 'units': '',
                 'long_name': '',
                 'standard_name': ''
                 } for var_name in self._variables]

        self._status = status if status else DataSourceStatus.READY
Пример #25
0
def enso(ds: xr.Dataset,
         var: VarName.TYPE,
         file: str,
         region: str = 'n34',
         custom_region: PolygonLike.TYPE = None,
         threshold: float = None,
         monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    Calculate ENSO index, which is defined as a five month running mean of
    anomalies of monthly means of SST data in the given region.

    :param ds: A monthly SST dataset
    :param file: Path to the reference data file e.g. a climatology. A suitable reference dataset
    can be generated using the long_term_average operation
    :param var: Dataset variable to use for index calculation
    :param region: Region for index calculation, the default is Nino3.4
    :param custom_region: If 'custom' is chosen as the 'region', this parameter
    has to be provided to set the desired region.
    :param threshold: If given, boolean El Nino/La Nina timeseries will be
    calculated and added to the output dataset, according to the given
    threshold. Where anomaly larger than then positive value of the threshold
    indicates El Nino and anomaly smaller than the negative of the given
    threshold indicates La Nina.
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries.
    """
    regions = {
        'N1+2': '-90, -10, -80, 0',
        'N3': '-150, -5, -90, 5',
        'N3.4': '-170, -5, -120, 5',
        'N4': '160, -5, -150, 5',
        'custom': custom_region
    }
    converted_region = PolygonLike.convert(regions[region])
    if not converted_region:
        raise ValidationError(
            'No region has been provided to ENSO index calculation')

    name = 'ENSO ' + region + ' Index'
    if 'custom' == region:
        name = 'ENSO Index over ' + PolygonLike.format(converted_region)

    return _generic_index_calculation(ds, var, converted_region, 5, file, name,
                                      threshold, monitor)
Пример #26
0
def _generic_index_calculation(ds: xr.Dataset,
                               var: VarName.TYPE,
                               region: PolygonLike.TYPE,
                               window: int,
                               file: str,
                               name: str,
                               threshold: float = None,
                               monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    A generic index calculation. Where an index is defined as an anomaly
    against the given reference of a moving average of the given window size of
    the given given region of the given variable of the given dataset.

    :param ds: Dataset from which to calculate the index
    :param var: Variable from which to calculate index
    :param region: Spatial subset from which to calculate the index
    :param window: Window size for the moving average
    :param file: Path to the reference file
    :param threshold: Absolute threshold that indicates an ENSO event
    :param name: Name of the index
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries
    """
    var = VarName.convert(var)
    region = PolygonLike.convert(region)

    with monitor.starting("Calculate the index", total_work=2):
        ds = select_var(ds, var)
        ds_subset = subset_spatial(ds, region)
        anom = anomaly_external(ds_subset, file, monitor=monitor.child(1))
        with monitor.child(1).observing("Calculate mean"):
            ts = anom.mean(dim=['lat', 'lon'])
        df = pd.DataFrame(data=ts[var].values, columns=[name], index=ts.time)
        retval = df.rolling(window=window, center=True).mean().dropna()

    if threshold is None:
        return retval

    retval['El Nino'] = pd.Series((retval[name] > threshold),
                                  index=retval.index)
    retval['La Nina'] = pd.Series((retval[name] < -threshold),
                                  index=retval.index)
    return retval
Пример #27
0
    def test_convert(self):
        self.assertEqual(PolygonLike.convert(None), None)
        self.assertEqual(PolygonLike.convert(''), None)
        coords = [(10.4, 20.2), (30.8, 20.2), (30.8, 40.8), (10.4, 40.8)]
        self.assertTrue(PolygonLike.convert(coords), Polygon(coords))
        self.assertTrue(PolygonLike.convert([10.4, 20.2, 30.8, 40.8]),
                        Polygon(coords))

        with self.assertRaises(ValidationError) as err:
            PolygonLike.convert('aaa')
        self.assertEqual(
            str(err.exception),
            "Value cannot be converted into a 'PolygonLike': "
            "Invalid geometry WKT format.")
Пример #28
0
    def __init__(self,
                 name: str,
                 files: Union[Sequence[str], OrderedDict],
                 data_store: 'LocalDataStore',
                 temporal_coverage: TimeRangeLike.TYPE = None,
                 spatial_coverage: PolygonLike.TYPE = None,
                 variables: VarNamesLike.TYPE = None,
                 reference_type: str = None,
                 reference_name: str = None):
        self._name = name
        if isinstance(files, Sequence):
            self._files = OrderedDict.fromkeys(files)
        else:
            self._files = files
        self._data_store = data_store

        initial_temporal_coverage = TimeRangeLike.convert(
            temporal_coverage) if temporal_coverage else None
        if not initial_temporal_coverage:
            files_number = len(self._files.items())
            if files_number > 0:
                files_range = list(self._files.values())
                if files_range:
                    if isinstance(files_range[0], Tuple):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            tuple([
                                files_range[0][0],
                                files_range[files_number - 1][1]
                            ]))
                    elif isinstance(files_range[0], datetime):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            (files_range[0], files_range[files_number - 1]))

        self._temporal_coverage = initial_temporal_coverage
        self._spatial_coverage = PolygonLike.convert(
            spatial_coverage) if spatial_coverage else None
        self._variables = VarNamesLike.convert(
            variables) if variables else None

        self._reference_type = reference_type if reference_type else None
        self._reference_name = reference_name
Пример #29
0
def enso(ds: xr.Dataset,
         var: VarName.TYPE,
         file: str,
         region: str = 'n34',
         custom_region: PolygonLike.TYPE = None,
         threshold: float = None,
         monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    Calculate ENSO index, which is defined as a five month running mean of
    anomalies of monthly means of SST data in the given region.

    :param ds: A monthly SST dataset
    :param file: Path to the reference data file e.g. a climatology. A suitable reference dataset
    can be generated using the long_term_average operation
    :param var: Dataset variable to use for index calculation
    :param region: Region for index calculation, the default is Nino3.4
    :param custom_region: If 'custom' is chosen as the 'region', this parameter
    has to be provided to set the desired region.
    :param threshold: If given, boolean El Nino/La Nina timeseries will be
    calculated and added to the output dataset, according to the given
    threshold. Where anomaly larger than then positive value of the threshold
    indicates El Nino and anomaly smaller than the negative of the given
    threshold indicates La Nina.
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries.
    """
    regions = {'N1+2': '-90, -10, -80, 0',
               'N3': '-150, -5, -90, 5',
               'N3.4': '-170, -5, -120, 5',
               'N4': '160, -5, -150, 5',
               'custom': custom_region}
    converted_region = PolygonLike.convert(regions[region])
    if not converted_region:
        raise ValidationError('No region has been provided to ENSO index calculation')

    name = 'ENSO ' + region + ' Index'
    if 'custom' == region:
        name = 'ENSO Index over ' + PolygonLike.format(converted_region)

    return _generic_index_calculation(ds, var, converted_region, 5, file, name, threshold, monitor)
Пример #30
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'Data source \'{}\' does not seem to have any data files'.format(
                self.name)
            if time_range is not None:
                msg += ' in given time range {}'.format(
                    TimeRangeLike.format(time_range))
            raise IOError(msg)

        files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP)
        try:
            ds = open_xarray_dataset(files)
            if region:
                [lat_min, lon_min, lat_max, lon_max] = region.bounds
                ds = ds.sel(drop=False,
                            lat=slice(lat_min, lat_max),
                            lon=slice(lon_min, lon_max))
            if var_names:
                ds = ds.drop([
                    var_name for var_name in ds.variables.keys()
                    if var_name not in var_names
                ])
            return ds

        except OSError as e:
            raise IOError("Files: {} caused:\nOSError({}): {}".format(
                files, e.errno, e.strerror))
Пример #31
0
    def test_make_local_and_update(self):

        soilmoisture_data_sources = self.data_store.query(
            query_expr='esacci.SOILMOISTURE.day.L3S.SSMV.multi-sensor.multi-platform.COMBINED.02-1.r1')
        soilmoisture_data_source = soilmoisture_data_sources[0]

        reference_path = os.path.join(os.path.dirname(__file__),
                                      os.path.normpath('resources/datasources/local/files/'))

        def find_files_mock(_, time_range):

            def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size: int):

                return [item_name, date_from, date_to, size,
                        {'OPENDAP': os.path.join(reference_path, item_name),
                         'HTTPServer': 'file:' + urllib.request.pathname2url(os.path.join(reference_path, item_name))}]

            reference_files = {
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781114000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 14, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 14, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781115000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 15, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 15, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781116000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 16, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 16, 23, 59),
                    'size': 21511378
                }
            }

            reference_files_list = []

            for reference_file in reference_files.items():
                file_name = reference_file[0]
                file_date_from = reference_file[1].get('date_from')
                file_date_to = reference_file[1].get('date_to')
                file_size = reference_file[1].get('size')
                if time_range:
                    if file_date_from >= time_range[0] and file_date_to <= time_range[1]:
                        reference_files_list.append(build_file_item(file_name,
                                                                    file_date_from,
                                                                    file_date_to,
                                                                    file_size))
                else:
                    reference_files_list.append(build_file_item(file_name,
                                                                file_date_from,
                                                                file_date_to,
                                                                file_size))
            return reference_files_list

        with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock):
            with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):

                new_ds_title = 'local_ds_test'
                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 16, 23, 59)))
                try:
                    new_ds = soilmoisture_data_source.make_local(new_ds_title, time_range=new_ds_time_range)
                except Exception:
                    raise ValueError(reference_path, os.listdir(reference_path))
                self.assertIsNotNone(new_ds)

                self.assertEqual(new_ds.id, "local.%s" % new_ds_title)
                self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range)

                new_ds_w_one_variable_title = 'local_ds_test_var'
                new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                          datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm'])

                new_ds_w_one_variable = soilmoisture_data_source.make_local(
                    new_ds_w_one_variable_title,
                    time_range=new_ds_w_one_variable_time_range,
                    var_names=new_ds_w_one_variable_var_names
                )
                self.assertIsNotNone(new_ds_w_one_variable)

                self.assertEqual(new_ds_w_one_variable.id, "local.%s" % new_ds_w_one_variable_title)
                ds = new_ds_w_one_variable.open_dataset()

                new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(ds.variables),
                                    set(new_ds_w_one_variable_var_names))

                new_ds_w_region_title = 'from_local_to_local_region'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    region=new_ds_w_region_spatial_coverage)  # type: LocalDataSource

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)

                new_ds_w_region_title = 'from_local_to_local_region_one_var'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)  # type: LocalDataSource

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                new_ds_w_region_title = 'from_local_to_local_region_two_var_sm_uncertainty'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm', 'sm_uncertainty'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)  # type: LocalDataSource

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                empty_ds_timerange = (datetime.datetime(2017, 12, 1, 0, 0), datetime.datetime(2017, 12, 31, 23, 59))
                with self.assertRaises(DataAccessError) as cm:
                    soilmoisture_data_source.make_local('empty_ds', time_range=empty_ds_timerange)
                self.assertEqual(f'Data source "{soilmoisture_data_source.id}" does not'
                                 f' seem to have any datasets in given'
                                 f' time range {TimeRangeLike.format(empty_ds_timerange)}',
                                 str(cm.exception))

                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 14, 23, 59)))

                new_ds = soilmoisture_data_source.make_local("title_test_copy", time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], soilmoisture_data_source.meta_info['title'])

                title = "Title Test!"
                new_ds = soilmoisture_data_source.make_local("title_test_set", title, time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], title)
Пример #32
0
    def make_local(self,
                   local_name: str,
                   local_id: str = None,
                   time_range: TimeRangeLike.TYPE = None,
                   region: PolygonLike.TYPE = None,
                   var_names: VarNamesLike.TYPE = None,
                   monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        ds_id = local_name
        title = local_id

        local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            add_to_data_store_registry()
            local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            raise ValueError('Cannot initialize `local` DataStore')

        uuid = LocalDataStore.generate_uuid(ref_id=self.id,
                                            time_range=time_range,
                                            region=region,
                                            var_names=var_names)

        if not ds_id or len(ds_id) == 0:
            ds_id = "local.{}.{}".format(self.id, uuid)
            existing_ds_list = local_store.query(ds_id=ds_id)
            if len(existing_ds_list) == 1:
                return existing_ds_list[0]
        else:
            existing_ds_list = local_store.query(ds_id='local.%s' % ds_id)
            if len(existing_ds_list) == 1:
                if existing_ds_list[0].meta_info.get('uuid', None) == uuid:
                    return existing_ds_list[0]
                else:
                    raise ValueError(
                        'Datastore {} already contains dataset {}'.format(
                            local_store.id, ds_id))

        local_meta_info = self.meta_info.copy()
        local_meta_info['ref_uuid'] = local_meta_info.get('uuid', None)
        local_meta_info['uuid'] = uuid

        local_ds = local_store.create_data_source(ds_id,
                                                  title=title,
                                                  time_range=time_range,
                                                  region=region,
                                                  var_names=var_names,
                                                  meta_info=local_meta_info,
                                                  lock_file=True)
        if local_ds:
            if not local_ds.is_complete:
                try:
                    self._make_local(local_ds,
                                     time_range,
                                     region,
                                     var_names,
                                     monitor=monitor)
                except Cancellation as c:
                    local_store.remove_data_source(local_ds)
                    raise c
                except Exception as e:
                    if local_ds.is_empty:
                        local_store.remove_data_source(local_ds)
                    raise e

            if local_ds.is_empty:
                local_store.remove_data_source(local_ds)
                return None

            local_store.register_ds(local_ds)
            return local_ds
        else:
            return None
Пример #33
0
    def _make_local(self,
                    local_ds: LocalDataSource,
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        local_id = local_ds.id
        time_range = TimeRangeLike.convert(time_range)
        region = PolygonLike.convert(region)
        var_names = VarNamesLike.convert(var_names)

        time_range, region, var_names = self._apply_make_local_fixes(
            time_range, region, var_names)

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL',
                                             NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        do_update_of_verified_time_coverage_start_once = True
        verified_time_coverage_start = None
        verified_time_coverage_end = None

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({
                'zlib': True,
                'complevel': compression_level
            })

        if region or var_names:
            protocol = _ODP_PROTOCOL_OPENDAP
        else:
            protocol = _ODP_PROTOCOL_HTTP

        local_path = os.path.join(local_ds.data_store.data_store_path,
                                  local_id)
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'CCI Open Data Portal data source "{}"\ndoes not seem to have any datasets'.format(
                self.id)
            if time_range is not None:
                msg += ' in given time range {}'.format(
                    TimeRangeLike.format(time_range))
            raise DataAccessError(msg)
        try:
            if protocol == _ODP_PROTOCOL_OPENDAP:

                do_update_of_variables_meta_info_once = True
                do_update_of_region_meta_info_once = True

                files = self._get_urls_list(selected_file_list, protocol)
                monitor.start('Sync ' + self.id, total_work=len(files))
                for idx, dataset_uri in enumerate(files):
                    child_monitor = monitor.child(work=1)

                    file_name = os.path.basename(dataset_uri)
                    local_filepath = os.path.join(local_path, file_name)

                    time_coverage_start = selected_file_list[idx][1]
                    time_coverage_end = selected_file_list[idx][2]

                    try:
                        child_monitor.start(label=file_name, total_work=1)

                        remote_dataset = xr.open_dataset(dataset_uri)

                        if var_names:
                            remote_dataset = remote_dataset.drop([
                                var_name for var_name in
                                remote_dataset.data_vars.keys()
                                if var_name not in var_names
                            ])

                        if region:
                            remote_dataset = normalize_impl(remote_dataset)
                            remote_dataset = subset_spatial_impl(
                                remote_dataset, region)
                            geo_lon_min, geo_lat_min, geo_lon_max, geo_lat_max = region.bounds

                            remote_dataset.attrs[
                                'geospatial_lat_min'] = geo_lat_min
                            remote_dataset.attrs[
                                'geospatial_lat_max'] = geo_lat_max
                            remote_dataset.attrs[
                                'geospatial_lon_min'] = geo_lon_min
                            remote_dataset.attrs[
                                'geospatial_lon_max'] = geo_lon_max
                            if do_update_of_region_meta_info_once:
                                local_ds.meta_info['bbox_maxx'] = geo_lon_max
                                local_ds.meta_info['bbox_minx'] = geo_lon_min
                                local_ds.meta_info['bbox_maxy'] = geo_lat_max
                                local_ds.meta_info['bbox_miny'] = geo_lat_min
                                do_update_of_region_meta_info_once = False

                        if compression_enabled:
                            for sel_var_name in remote_dataset.variables.keys(
                            ):
                                remote_dataset.variables.get(
                                    sel_var_name).encoding.update(
                                        encoding_update)

                        remote_dataset.to_netcdf(local_filepath)

                        child_monitor.progress(work=1,
                                               msg=str(time_coverage_start))
                    finally:
                        if do_update_of_variables_meta_info_once:
                            variables_info = local_ds.meta_info.get(
                                'variables', [])
                            local_ds.meta_info['variables'] = [
                                var_info for var_info in variables_info
                                if var_info.get('name') in remote_dataset.
                                variables.keys() and var_info.get(
                                    'name') not in remote_dataset.dims.keys()
                            ]
                            do_update_of_variables_meta_info_once = False

                        local_ds.add_dataset(
                            os.path.join(local_id, file_name),
                            (time_coverage_start, time_coverage_end))

                        if do_update_of_verified_time_coverage_start_once:
                            verified_time_coverage_start = time_coverage_start
                            do_update_of_verified_time_coverage_start_once = False
                        verified_time_coverage_end = time_coverage_end
                    child_monitor.done()
            else:
                outdated_file_list = []
                for file_rec in selected_file_list:
                    filename, _, _, file_size, url = file_rec
                    dataset_file = os.path.join(local_path, filename)
                    # todo (forman, 20160915): must perform better checks on dataset_file if it is...
                    # ... outdated or incomplete or corrupted.
                    # JSON also includes "checksum" and "checksum_type" fields.
                    if not os.path.isfile(dataset_file) or (
                            file_size
                            and os.path.getsize(dataset_file) != file_size):
                        outdated_file_list.append(file_rec)

                if outdated_file_list:
                    with monitor.starting('Sync ' + self.id,
                                          len(outdated_file_list)):
                        bytes_to_download = sum(
                            [file_rec[3] for file_rec in outdated_file_list])
                        dl_stat = _DownloadStatistics(bytes_to_download)

                        file_number = 1

                        for filename, coverage_from, coverage_to, file_size, url in outdated_file_list:
                            dataset_file = os.path.join(local_path, filename)
                            sub_monitor = monitor.child(work=1.0)

                            # noinspection PyUnusedLocal
                            def reporthook(block_number, read_size,
                                           total_file_size):
                                dl_stat.handle_chunk(read_size)
                                sub_monitor.progress(work=read_size,
                                                     msg=str(dl_stat))

                            sub_monitor_msg = "file %d of %d" % (
                                file_number, len(outdated_file_list))
                            with sub_monitor.starting(sub_monitor_msg,
                                                      file_size):
                                urllib.request.urlretrieve(
                                    url[protocol],
                                    filename=dataset_file,
                                    reporthook=reporthook)
                            file_number += 1
                            local_ds.add_dataset(
                                os.path.join(local_id, filename),
                                (coverage_from, coverage_to))

                            if do_update_of_verified_time_coverage_start_once:
                                verified_time_coverage_start = coverage_from
                                do_update_of_verified_time_coverage_start_once = False
                            verified_time_coverage_end = coverage_to
        except OSError as e:
            raise DataAccessError(
                "Copying remote data source failed: {}".format(e),
                source=self) from e
        local_ds.meta_info['temporal_coverage_start'] = TimeLike.format(
            verified_time_coverage_start)
        local_ds.meta_info['temporal_coverage_end'] = TimeLike.format(
            verified_time_coverage_end)
        local_ds.save(True)
Пример #34
0
 def test_format(self):
     coords = [(0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0)]
     pol = PolygonLike.convert(coords)
     self.assertTrue(
         'POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))' == PolygonLike.format(pol))
Пример #35
0
    def test_make_local_and_update(self):

        reference_path = os.path.join(os.path.dirname(__file__),
                                      os.path.normpath('resources/datasources/local/files/'))

        def find_files_mock(_, time_range):

            def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size: int):

                return [item_name, date_from, date_to, size,
                        {'OPENDAP': os.path.join(reference_path, item_name),
                         'HTTPServer': 'file:' + urllib.request.pathname2url(os.path.join(reference_path, item_name))}]

            reference_files = {
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781114000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 14, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 14, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781115000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 15, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 15, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781116000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 16, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 16, 23, 59),
                    'size': 21511378
                }
            }

            reference_files_list = []

            for reference_file in reference_files.items():
                file_name = reference_file[0]
                file_date_from = reference_file[1].get('date_from')
                file_date_to = reference_file[1].get('date_to')
                file_size = reference_file[1].get('size')
                if time_range:
                    if file_date_from >= time_range[0] and file_date_to <= time_range[1]:
                        reference_files_list.append(build_file_item(file_name,
                                                                    file_date_from,
                                                                    file_date_to,
                                                                    file_size))
                else:
                    reference_files_list.append(build_file_item(file_name,
                                                                file_date_from,
                                                                file_date_to,
                                                                file_size))
            return reference_files_list

        with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock):
            with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):
                try:
                    new_ds = self.data_source.make_local('local_ds_test', None,
                                                         (datetime.datetime(1978, 11, 14, 0, 0),
                                                          datetime.datetime(1978, 11, 15, 23, 59)))
                except:
                    raise ValueError(reference_path, os.listdir(reference_path))

                self.assertEqual(new_ds.name, 'local.local_ds_test')
                self.assertEqual(new_ds.temporal_coverage(),
                                 (datetime.datetime(1978, 11, 14, 0, 0),
                                  datetime.datetime(1978, 11, 15, 23, 59)))

                self.data_source.update_local(new_ds.name, (datetime.datetime(1978, 11, 15, 00, 00),
                                                            datetime.datetime(1978, 11, 16, 23, 59)))
                self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert(
                                 (datetime.datetime(1978, 11, 15, 0, 0),
                                  datetime.datetime(1978, 11, 16, 23, 59))))

                self.data_source.update_local(new_ds.name, (datetime.datetime(1978, 11, 14, 00, 00),
                                                            datetime.datetime(1978, 11, 15, 23, 59)))
                self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert(
                                 (datetime.datetime(1978, 11, 14, 0, 0),
                                  datetime.datetime(1978, 11, 15, 23, 59))))

                with self.assertRaises(ValueError) as context:
                    self.data_source.update_local("wrong_ds_name", (datetime.datetime(1978, 11, 15, 00, 00),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                self.assertTrue("Couldn't find local DataSource", context.exception.args[0])

                new_ds_w_one_variable = self.data_source.make_local(
                    'local_ds_test_2', None, (datetime.datetime(1978, 11, 14, 0, 0),
                                              datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm'])
                self.assertEqual(new_ds_w_one_variable.name, 'local.local_ds_test_2')
                ds = new_ds_w_one_variable.open_dataset()
                self.assertSetEqual(set(ds.variables), {'sm', 'lat', 'lon', 'time'})

                new_ds_w_region = self.data_source.make_local(
                    'from_local_to_local_region', None, (datetime.datetime(1978, 11, 14, 0, 0),
                                                         datetime.datetime(1978, 11, 15, 23, 59)),
                    "10,10,20,20", ['sm'])  # type: LocalDataSource
                self.assertEqual(new_ds_w_region.name, 'local.from_local_to_local_region')
                self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20"))
                data_set = new_ds_w_region.open_dataset()
                self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})
Пример #36
0
def plot_map(ds: xr.Dataset,
             var: VarName.TYPE = None,
             indexers: DictLike.TYPE = None,
             time: TimeLike.TYPE = None,
             region: PolygonLike.TYPE = None,
             projection: str = 'PlateCarree',
             central_lon: float = 0.0,
             title: str = None,
             properties: DictLike.TYPE = None,
             file: str = None) -> Figure:
    """
    Create a geographic map plot for the variable given by dataset *ds* and variable name *var*.

    Plots the given variable from the given dataset on a map with coastal lines.
    In case no variable name is given, the first encountered variable in the
    dataset is plotted. In case no *time* is given, the first time slice
    is taken. It is also possible to set extents of the plot. If no extents
    are given, a global plot is created.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: the dataset containing the variable to plot
    :param var: the variable's name
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "layer=4".
    :param time: time slice index to plot, can be a string "YYYY-MM-DD" or an integer number
    :param region: Region to plot
    :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
    :param central_lon: central longitude of the projection in degrees
    :param title: an optional title
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5)"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    if not isinstance(ds, xr.Dataset):
        raise NotImplementedError(
            'Only gridded datasets are currently supported')

    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)
    var = ds[var_name]

    time = TimeLike.convert(time)
    indexers = DictLike.convert(indexers) or {}
    properties = DictLike.convert(properties) or {}

    extents = None
    region = PolygonLike.convert(region)
    if region:
        lon_min, lat_min, lon_max, lat_max = region.bounds
        if not _check_bounding_box(lat_min, lat_max, lon_min, lon_max):
            raise ValueError(
                'Provided plot extents do not form a valid bounding box '
                'within [-180.0,+180.0,-90.0,+90.0]')
        extents = [lon_min, lon_max, lat_min, lat_max]

    # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html#
    if projection == 'PlateCarree':
        proj = ccrs.PlateCarree(central_longitude=central_lon)
    elif projection == 'LambertCylindrical':
        proj = ccrs.LambertCylindrical(central_longitude=central_lon)
    elif projection == 'Mercator':
        proj = ccrs.Mercator(central_longitude=central_lon)
    elif projection == 'Miller':
        proj = ccrs.Miller(central_longitude=central_lon)
    elif projection == 'Mollweide':
        proj = ccrs.Mollweide(central_longitude=central_lon)
    elif projection == 'Orthographic':
        proj = ccrs.Orthographic(central_longitude=central_lon)
    elif projection == 'Robinson':
        proj = ccrs.Robinson(central_longitude=central_lon)
    elif projection == 'Sinusoidal':
        proj = ccrs.Sinusoidal(central_longitude=central_lon)
    elif projection == 'NorthPolarStereo':
        proj = ccrs.NorthPolarStereo(central_longitude=central_lon)
    elif projection == 'SouthPolarStereo':
        proj = ccrs.SouthPolarStereo(central_longitude=central_lon)
    else:
        raise ValueError('illegal projection: "%s"' % projection)

    figure = plt.figure(figsize=(8, 4))
    ax = plt.axes(projection=proj)
    if extents:
        ax.set_extent(extents)
    else:
        ax.set_global()

    ax.coastlines()
    var_data = _get_var_data(var,
                             indexers,
                             time=time,
                             remaining_dims=('lon', 'lat'))
    var_data.plot.contourf(ax=ax, transform=proj, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Пример #37
0
    def test_make_local_and_update(self):

        soilmoisture_data_sources = self.data_store.query(
            query_expr='esacci.SOILMOISTURE.day.L3S.SSMV.multi-sensor.multi-platform.COMBINED.02-1.r1')
        soilmoisture_data_source = soilmoisture_data_sources[0]

        reference_path = os.path.join(os.path.dirname(__file__),
                                      os.path.normpath('resources/datasources/local/files/'))

        def find_files_mock(_, time_range):

            def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size: int):

                return [item_name, date_from, date_to, size,
                        {'OPENDAP': os.path.join(reference_path, item_name),
                         'HTTPServer': 'file:' + urllib.request.pathname2url(os.path.join(reference_path, item_name))}]

            reference_files = {
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781114000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 14, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 14, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781115000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 15, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 15, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781116000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 16, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 16, 23, 59),
                    'size': 21511378
                }
            }

            reference_files_list = []

            for reference_file in reference_files.items():
                file_name = reference_file[0]
                file_date_from = reference_file[1].get('date_from')
                file_date_to = reference_file[1].get('date_to')
                file_size = reference_file[1].get('size')
                if time_range:
                    if file_date_from >= time_range[0] and file_date_to <= time_range[1]:
                        reference_files_list.append(build_file_item(file_name,
                                                                    file_date_from,
                                                                    file_date_to,
                                                                    file_size))
                else:
                    reference_files_list.append(build_file_item(file_name,
                                                                file_date_from,
                                                                file_date_to,
                                                                file_size))
            return reference_files_list

        with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock):
            with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):

                new_ds_title = 'local_ds_test'
                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 16, 23, 59)))
                try:
                    new_ds = soilmoisture_data_source.make_local(new_ds_title, time_range=new_ds_time_range)
                except Exception:
                    raise ValueError(reference_path, os.listdir(reference_path))
                self.assertIsNotNone(new_ds)

                self.assertEqual(new_ds.id, "local.%s" % new_ds_title)
                self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range)

                new_ds_w_one_variable_title = 'local_ds_test_var'
                new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                          datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm'])

                new_ds_w_one_variable = soilmoisture_data_source.make_local(
                    new_ds_w_one_variable_title,
                    time_range=new_ds_w_one_variable_time_range,
                    var_names=new_ds_w_one_variable_var_names
                )
                self.assertIsNotNone(new_ds_w_one_variable)

                self.assertEqual(new_ds_w_one_variable.id, "local.%s" % new_ds_w_one_variable_title)
                ds = new_ds_w_one_variable.open_dataset()

                new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(ds.variables),
                                    set(new_ds_w_one_variable_var_names))

                new_ds_w_region_title = 'from_local_to_local_region'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    region=new_ds_w_region_spatial_coverage)

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)

                new_ds_w_region_title = 'from_local_to_local_region_one_var'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                new_ds_w_region_title = 'from_local_to_local_region_two_var_sm_uncertainty'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm', 'sm_uncertainty'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                empty_ds_timerange = (datetime.datetime(2017, 12, 1, 0, 0), datetime.datetime(2017, 12, 31, 23, 59))
                with self.assertRaises(DataAccessError) as cm:
                    soilmoisture_data_source.make_local('empty_ds', time_range=empty_ds_timerange)
                self.assertEqual(f'Data source "{soilmoisture_data_source.id}" does not'
                                 f' seem to have any datasets in given'
                                 f' time range {TimeRangeLike.format(empty_ds_timerange)}',
                                 str(cm.exception))

                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 14, 23, 59)))

                new_ds = soilmoisture_data_source.make_local("title_test_copy", time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], soilmoisture_data_source.meta_info['title'])

                title = "Title Test!"
                new_ds = soilmoisture_data_source.make_local("title_test_set", title, time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], title)
Пример #38
0
    def _make_local(self,
                    local_ds: 'LocalDataSource',
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        # local_name = local_ds.name
        local_id = local_ds.name

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(
            var_names) if var_names else None  # type: Sequence

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL',
                                             NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({
                'zlib': True,
                'complevel': compression_level
            })

        local_path = os.path.join(local_ds.data_store.data_store_path,
                                  local_id)
        data_store_path = local_ds.data_store.data_store_path
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        monitor.start("Sync " + self.name, total_work=len(self._files.items()))
        for remote_relative_filepath, coverage in self._files.items():
            child_monitor = monitor.child(work=1)

            file_name = os.path.basename(remote_relative_filepath)
            local_relative_filepath = os.path.join(local_id, file_name)
            local_absolute_filepath = os.path.join(data_store_path,
                                                   local_relative_filepath)

            remote_absolute_filepath = os.path.join(
                self._data_store.data_store_path, remote_relative_filepath)

            if isinstance(coverage, Tuple):

                time_coverage_start = coverage[0]
                time_coverage_end = coverage[1]

                remote_netcdf = None
                local_netcdf = None
                if not time_range or time_coverage_start >= time_range[
                        0] and time_coverage_end <= time_range[1]:
                    if region or var_names:
                        try:
                            remote_netcdf = NetCDF4DataStore(
                                remote_absolute_filepath)

                            local_netcdf = NetCDF4DataStore(
                                local_absolute_filepath,
                                mode='w',
                                persist=True)
                            local_netcdf.set_attributes(
                                remote_netcdf.get_attrs())

                            remote_dataset = xr.Dataset.load_store(
                                remote_netcdf)

                            process_region = False
                            if region:
                                geo_lat_min = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lat_min')
                                geo_lat_max = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lat_max')
                                geo_lon_min = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lon_min')
                                geo_lon_max = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs, 'geospatial_lon_max')

                                geo_lat_res = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs,
                                    'geospatial_lon_resolution')
                                geo_lon_res = self._get_harmonized_coordinate_value(
                                    remote_dataset.attrs,
                                    'geospatial_lat_resolution')
                                if not (isnan(geo_lat_min)
                                        or isnan(geo_lat_max)
                                        or isnan(geo_lon_min)
                                        or isnan(geo_lon_max)
                                        or isnan(geo_lat_res)
                                        or isnan(geo_lon_res)):
                                    process_region = True

                                    [lat_min, lon_min, lat_max,
                                     lon_max] = region.bounds

                                    lat_min = floor(
                                        (lat_min - geo_lat_min) / geo_lat_res)
                                    lat_max = ceil(
                                        (lat_max - geo_lat_min) / geo_lat_res)
                                    lon_min = floor(
                                        (lon_min - geo_lon_min) / geo_lon_res)
                                    lon_max = ceil(
                                        (lon_max - geo_lon_min) / geo_lon_res)

                                    # TODO (kbernat): check why dataset.sel fails!
                                    remote_dataset = remote_dataset.isel(
                                        drop=False,
                                        lat=slice(lat_min, lat_max),
                                        lon=slice(lon_min, lon_max))

                                    geo_lat_max = lat_max * geo_lat_res + geo_lat_min
                                    geo_lat_min += lat_min * geo_lat_res
                                    geo_lon_max = lon_max * geo_lon_res + geo_lon_min
                                    geo_lon_min += lon_min * geo_lon_res

                            if not var_names:
                                var_names = [
                                    var_name for var_name in
                                    remote_netcdf.variables.keys()
                                ]
                            var_names.extend([
                                coord_name
                                for coord_name in remote_dataset.coords.keys()
                                if coord_name not in var_names
                            ])
                            child_monitor.start(label=file_name,
                                                total_work=len(var_names))
                            for sel_var_name in var_names:
                                var_dataset = remote_dataset.drop([
                                    var_name for var_name in
                                    remote_dataset.variables.keys()
                                    if var_name != sel_var_name
                                ])
                                if compression_enabled:
                                    var_dataset.variables.get(
                                        sel_var_name).encoding.update(
                                            encoding_update)
                                local_netcdf.store_dataset(var_dataset)
                                child_monitor.progress(work=1,
                                                       msg=sel_var_name)
                            if process_region:
                                local_netcdf.set_attribute(
                                    'geospatial_lat_min', geo_lat_min)
                                local_netcdf.set_attribute(
                                    'geospatial_lat_max', geo_lat_max)
                                local_netcdf.set_attribute(
                                    'geospatial_lon_min', geo_lon_min)
                                local_netcdf.set_attribute(
                                    'geospatial_lon_max', geo_lon_max)
                        finally:
                            if remote_netcdf:
                                remote_netcdf.close()
                            if local_netcdf:
                                local_netcdf.close()
                                local_ds.add_dataset(
                                    local_relative_filepath,
                                    (time_coverage_start, time_coverage_end))
                        child_monitor.done()
                    else:
                        shutil.copy(remote_absolute_filepath,
                                    local_absolute_filepath)
                        local_ds.add_dataset(
                            local_relative_filepath,
                            (time_coverage_start, time_coverage_end))
                        child_monitor.done()
        monitor.done()
        return local_id
Пример #39
0
    def _make_local(self,
                    local_ds: 'LocalDataSource',
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        local_id = local_ds.id

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(
            var_names) if var_names else None  # type: Sequence

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL',
                                             NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({
                'zlib': True,
                'complevel': compression_level
            })

        local_path = os.path.join(local_ds.data_store.data_store_path,
                                  local_id)
        data_store_path = local_ds.data_store.data_store_path
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        monitor.start("Sync " + self.id, total_work=len(self._files.items()))
        for remote_relative_filepath, coverage in self._files.items():
            child_monitor = monitor.child(work=1)

            file_name = os.path.basename(remote_relative_filepath)
            local_relative_filepath = os.path.join(local_id, file_name)
            local_absolute_filepath = os.path.join(data_store_path,
                                                   local_relative_filepath)

            remote_absolute_filepath = os.path.join(
                self._data_store.data_store_path, remote_relative_filepath)

            if isinstance(coverage, Tuple):

                time_coverage_start = coverage[0]
                time_coverage_end = coverage[1]

                if not time_range or time_coverage_start >= time_range[
                        0] and time_coverage_end <= time_range[1]:
                    if region or var_names:

                        do_update_of_variables_meta_info_once = True
                        do_update_of_region_meta_info_once = True

                        try:
                            remote_dataset = xr.open_dataset(
                                remote_absolute_filepath)

                            if var_names:
                                remote_dataset = remote_dataset.drop([
                                    var_name for var_name in
                                    remote_dataset.data_vars.keys()
                                    if var_name not in var_names
                                ])

                            if region:
                                remote_dataset = normalize_impl(remote_dataset)
                                remote_dataset = subset_spatial_impl(
                                    remote_dataset, region)
                                geo_lon_min, geo_lat_min, geo_lon_max, geo_lat_max = region.bounds

                                remote_dataset.attrs[
                                    'geospatial_lat_min'] = geo_lat_min
                                remote_dataset.attrs[
                                    'geospatial_lat_max'] = geo_lat_max
                                remote_dataset.attrs[
                                    'geospatial_lon_min'] = geo_lon_min
                                remote_dataset.attrs[
                                    'geospatial_lon_max'] = geo_lon_max
                                if do_update_of_region_meta_info_once:
                                    local_ds.meta_info[
                                        'bbox_maxx'] = geo_lon_max
                                    local_ds.meta_info[
                                        'bbox_minx'] = geo_lon_min
                                    local_ds.meta_info[
                                        'bbox_maxy'] = geo_lat_max
                                    local_ds.meta_info[
                                        'bbox_miny'] = geo_lat_min
                                    do_update_of_region_meta_info_once = False

                            if compression_enabled:
                                for sel_var_name in remote_dataset.variables.keys(
                                ):
                                    remote_dataset.variables.get(
                                        sel_var_name).encoding.update(
                                            encoding_update)

                            remote_dataset.to_netcdf(local_absolute_filepath)

                            child_monitor.progress(
                                work=1, msg=str(time_coverage_start))
                        finally:
                            if do_update_of_variables_meta_info_once:
                                variables_info = local_ds.meta_info.get(
                                    'variables', [])
                                local_ds.meta_info['variables'] = [
                                    var_info for var_info in variables_info
                                    if var_info.get('name') in remote_dataset.
                                    variables.keys() and var_info.get('name')
                                    not in remote_dataset.dims.keys()
                                ]
                                do_update_of_variables_meta_info_once = False

                            local_ds.add_dataset(
                                os.path.join(local_id, file_name),
                                (time_coverage_start, time_coverage_end))

                        child_monitor.done()
                    else:
                        shutil.copy(remote_absolute_filepath,
                                    local_absolute_filepath)
                        local_ds.add_dataset(
                            local_relative_filepath,
                            (time_coverage_start, time_coverage_end))
                        child_monitor.done()
        monitor.done()
        return local_id
Пример #40
0
 def test_format(self):
     self.assertEqual(PolygonLike.format(None), '')
     coords = [(10.4, 20.2), (30.8, 20.2), (30.8, 40.8), (10.4, 40.8)]
     pol = PolygonLike.convert(coords)
     self.assertEqual(PolygonLike.format(pol), 'POLYGON ((10.4 20.2, 30.8 20.2, 30.8 40.8, 10.4 40.8, 10.4 20.2))')