Пример #1
0
    def test_open_xarray_dataset(self):
        path_large = os.path.join(_TEST_DATA_PATH, 'large', '*.nc')
        path_small = os.path.join(_TEST_DATA_PATH, 'small', '*.nc')

        ds_large_mon = RecordingMonitor()
        ds_small_mon = RecordingMonitor()
        ds_large = open_xarray_dataset(path_large, monitor=ds_large_mon)
        ds_small = open_xarray_dataset(path_small, monitor=ds_small_mon)

        # Test monitors
        self.assertEqual(ds_large_mon.records,
                         [('start', 'Opening dataset', 1),
                          ('progress', 1, None, 100), ('done', )])
        self.assertEqual(ds_small_mon.records,
                         [('start', 'Opening dataset', 1),
                          ('progress', 1, None, 100), ('done', )])

        # Test chunking
        self.assertEqual(ds_small.chunks, {
            'lon': (1440, ),
            'lat': (720, ),
            'time': (1, )
        })
        self.assertEqual(ds_large.chunks, {
            'lon': (7200, ),
            'lat': (3600, ),
            'time': (1, ),
            'bnds': (2, )
        })
Пример #2
0
    def test_autochunking(self):
        path_large = op.join(_TEST_DATA_PATH, 'large', '*.nc')
        path_small = op.join(_TEST_DATA_PATH, 'small', '*.nc')

        ds_large = ds.open_xarray_dataset(path_large)
        ds_small = ds.open_xarray_dataset(path_small)
        large_expected = {'lat': (1800, 1800), 'time': (1,), 'bnds': (2,),
                          'lon': (3600, 3600)}
        small_expected = {'lat': (720,), 'time': (1,), 'lon': (1440,)}
        self.assertEqual(ds_small.chunks, small_expected)
        self.assertEqual(ds_large.chunks, large_expected)
Пример #3
0
    def test_open_xarray(self):
        wrong_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nck')
        wrong_url = 'httpz://www.acme.com'
        path = [wrong_path, wrong_url]
        try:
            open_xarray_dataset(path)
        except IOError as e:
            self.assertEqual(str(e), 'File {} not found'.format(path))

        right_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nc')
        wrong_url = 'httpz://www.acme.com'
        path = [right_path, wrong_url]
        dsa = open_xarray_dataset(path)
        self.assertIsNotNone(dsa)
Пример #4
0
    def test_open_xarray(self):
        wrong_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nck')
        wrong_url = 'httpz://www.acme.com'
        path = [wrong_path, wrong_url]
        try:
            open_xarray_dataset(path)
        except IOError as e:
            self.assertEqual(str(e), 'File {} not found'.format(path))

        right_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nc')
        wrong_url = 'httpz://www.acme.com'
        path = [right_path, wrong_url]
        dsa = open_xarray_dataset(path)
        self.assertIsNotNone(dsa)
Пример #5
0
    def test_open_xarray_dataset(self):
        path_large = os.path.join(_TEST_DATA_PATH, 'large', '*.nc')
        path_small = os.path.join(_TEST_DATA_PATH, 'small', '*.nc')

        ds_large_mon = RecordingMonitor()
        ds_small_mon = RecordingMonitor()
        ds_large = open_xarray_dataset(path_large, monitor=ds_large_mon)
        ds_small = open_xarray_dataset(path_small, monitor=ds_small_mon)

        # Test monitors
        self.assertEqual(ds_large_mon.records, [('start', 'Opening dataset', 1), ('progress', 1, None, 100), ('done',)])
        self.assertEqual(ds_small_mon.records, [('start', 'Opening dataset', 1), ('progress', 1, None, 100), ('done',)])

        # Test chunking
        self.assertEqual(ds_small.chunks, {'lon': (1440,), 'lat': (720,), 'time': (1,)})
        self.assertEqual(ds_large.chunks, {'lon': (7200,), 'lat': (3600,), 'time': (1,), 'bnds': (2,)})
Пример #6
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'CCI Open Data Portal data source "{}"\ndoes not seem to have any datasets'.format(self.id)
            if time_range is not None:
                msg += ' in given time range {}'.format(TimeRangeLike.format(time_range))
            raise DataAccessError(msg)

        files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP)
        try:
            ds = open_xarray_dataset(files)
            if region:
                ds = normalize_impl(ds)
                ds = subset_spatial_impl(ds, region)
            if var_names:
                ds = ds.drop([var_name for var_name in ds.data_vars.keys() if var_name not in var_names])
            return ds

        except OSError as e:
            if time_range:
                raise DataAccessError("Cannot open remote dataset for time range {}:\n"
                                      "{}"
                                      .format(TimeRangeLike.format(time_range), e), source=self) from e
            else:
                raise DataAccessError("Cannot open remote dataset:\n"
                                      "{}"
                                      .format(TimeRangeLike.format(time_range), e), source=self) from e
Пример #7
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     time_range = TimeRangeLike.convert(time_range) if time_range else None
     if region:
         region = PolygonLike.convert(region)
     if var_names:
         var_names = VarNamesLike.convert(var_names)
     paths = []
     if time_range:
         time_series = list(self._files.values())
         file_paths = list(self._files.keys())
         for i in range(len(time_series)):
             if time_series[i]:
                 if isinstance(time_series[i], Tuple) and \
                         time_series[i][0] >= time_range[0] and \
                         time_series[i][1] <= time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
                 elif isinstance(
                         time_series[i], datetime
                 ) and time_range[0] <= time_series[i] < time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
     else:
         for file in self._files.items():
             paths.extend(self._resolve_file_path(file[0]))
     if paths:
         paths = sorted(set(paths))
         try:
             ds = open_xarray_dataset(paths)
             if region:
                 ds = normalize_impl(ds)
                 ds = subset_spatial_impl(ds, region)
             if var_names:
                 ds = ds.drop([
                     var_name for var_name in ds.data_vars.keys()
                     if var_name not in var_names
                 ])
             return ds
         except OSError as e:
             if time_range:
                 raise DataAccessError(
                     "Cannot open local dataset for time range {}:\n"
                     "{}".format(TimeRangeLike.format(time_range), e),
                     source=self) from e
             else:
                 raise DataAccessError("Cannot open local dataset:\n"
                                       "{}".format(e),
                                       source=self) from e
     else:
         if time_range:
             raise DataAccessError(
                 "No local datasets available for\nspecified time range {}".
                 format(TimeRangeLike.format(time_range)),
                 source=self)
         else:
             raise DataAccessError("No local datasets available",
                                   source=self)
Пример #8
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     paths = self.resolve_paths(TimeRangeLike.convert(time_range) if time_range else (None, None))
     unique_paths = list(set(paths))
     existing_paths = [p for p in unique_paths if os.path.exists(p)]
     if len(existing_paths) == 0:
         raise ValueError('No local file available. Consider syncing the dataset.')
     return open_xarray_dataset(existing_paths)
Пример #9
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None,
                  monitor: Monitor = Monitor.NONE) -> Any:
     paths = self.resolve_paths(TimeRangeLike.convert(time_range) if time_range else (None, None))
     unique_paths = list(set(paths))
     existing_paths = [p for p in unique_paths if os.path.exists(p)]
     if len(existing_paths) == 0:
         raise ValueError('No local file available. Consider syncing the dataset.')
     return open_xarray_dataset(existing_paths, region=region, var_names=var_names, monitor=monitor)
Пример #10
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None,
                     monitor: Monitor = Monitor.NONE) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        paths = []
        if time_range:
            time_series = list(self._files.values())
            file_paths = list(self._files.keys())
            for i in range(len(time_series)):
                if time_series[i]:
                    if isinstance(time_series[i], Tuple) and \
                            time_series[i][0] >= time_range[0] and \
                            time_series[i][1] <= time_range[1]:
                        paths.extend(self._resolve_file_path(file_paths[i]))
                    elif isinstance(time_series[i], datetime) and time_range[0] <= time_series[i] < time_range[1]:
                        paths.extend(self._resolve_file_path(file_paths[i]))
        else:
            for file in self._files.items():
                paths.extend(self._resolve_file_path(file[0]))

        if not paths:
            raise self._empty_error(time_range)

        paths = sorted(set(paths))
        try:
            excluded_variables = self._meta_info.get('exclude_variables')
            if excluded_variables:
                drop_variables = [variable.get('name') for variable in excluded_variables]
            else:
                drop_variables = None
            # TODO: combine var_names and drop_variables
            return open_xarray_dataset(paths,
                                       region=region,
                                       var_names=var_names,
                                       drop_variables=drop_variables,
                                       monitor=monitor)
        except HTTPError as e:
            raise self._cannot_access_error(time_range, region, var_names,
                                            verb="open", cause=e) from e
        except (URLError, socket.timeout) as e:
            raise self._cannot_access_error(time_range, region, var_names,
                                            verb="open", cause=e, error_cls=NetworkError) from e
        except OSError as e:
            raise self._cannot_access_error(time_range, region, var_names,
                                            verb="open", cause=e) from e
Пример #11
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     time_range = TimeRangeLike.convert(time_range) if time_range else None
     if region:
         region = PolygonLike.convert(region)
     if var_names:
         var_names = VarNamesLike.convert(var_names)
     paths = []
     if time_range:
         time_series = list(self._files.values())
         file_paths = list(self._files.keys())
         for i in range(len(time_series)):
             if time_series[i]:
                 if isinstance(time_series[i], Tuple) and \
                         time_series[i][0] >= time_range[0] and \
                         time_series[i][1] <= time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
                 elif isinstance(time_series[i], datetime) and \
                         time_range[0] <= time_series[i] < time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
     else:
         for file in self._files.items():
             paths.extend(self._resolve_file_path(file[0]))
     if paths:
         paths = sorted(set(paths))
         try:
             ds = open_xarray_dataset(paths)
             if region:
                 [lat_min, lon_min, lat_max, lon_max] = region.bounds
                 ds = ds.sel(drop=False,
                             lat=slice(lat_min, lat_max),
                             lon=slice(lon_min, lon_max))
             if var_names:
                 ds = ds.drop([
                     var_name for var_name in ds.variables.keys()
                     if var_name not in var_names
                 ])
             return ds
         except OSError as e:
             raise IOError("Files: {} caused:\nOSError({}): {}".format(
                 paths, e.errno, e.strerror))
     else:
         return None
Пример #12
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'Data source \'{}\' does not seem to have any data files'.format(
                self.name)
            if time_range is not None:
                msg += ' in given time range {}'.format(
                    TimeRangeLike.format(time_range))
            raise IOError(msg)

        files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP)
        try:
            ds = open_xarray_dataset(files)
            if region:
                [lat_min, lon_min, lat_max, lon_max] = region.bounds
                ds = ds.sel(drop=False,
                            lat=slice(lat_min, lat_max),
                            lon=slice(lon_min, lon_max))
            if var_names:
                ds = ds.drop([
                    var_name for var_name in ds.variables.keys()
                    if var_name not in var_names
                ])
            return ds

        except OSError as e:
            raise IOError("Files: {} caused:\nOSError({}): {}".format(
                files, e.errno, e.strerror))
Пример #13
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None,
                     monitor: Monitor = Monitor.NONE) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        paths = []
        if time_range:
            time_series = list(self._files.values())
            file_paths = list(self._files.keys())
            for i in range(len(time_series)):
                if time_series[i]:
                    if isinstance(time_series[i], Tuple) and \
                            time_series[i][0] >= time_range[0] and \
                            time_series[i][1] <= time_range[1]:
                        paths.extend(self._resolve_file_path(file_paths[i]))
                    elif isinstance(
                            time_series[i], datetime
                    ) and time_range[0] <= time_series[i] < time_range[1]:
                        paths.extend(self._resolve_file_path(file_paths[i]))
        else:
            for file in self._files.items():
                paths.extend(self._resolve_file_path(file[0]))

        if not paths:
            raise self._empty_error(time_range)

        paths = sorted(set(paths))
        try:
            excluded_variables = self._meta_info.get('exclude_variables')
            if excluded_variables:
                drop_variables = [
                    variable.get('name') for variable in excluded_variables
                ]
            else:
                drop_variables = None
            # TODO: combine var_names and drop_variables
            return open_xarray_dataset(paths,
                                       region=region,
                                       var_names=var_names,
                                       drop_variables=drop_variables,
                                       monitor=monitor)
        except HTTPError as e:
            raise self._cannot_access_error(time_range,
                                            region,
                                            var_names,
                                            verb="open",
                                            cause=e) from e
        except (URLError, socket.timeout) as e:
            raise self._cannot_access_error(time_range,
                                            region,
                                            var_names,
                                            verb="open",
                                            cause=e,
                                            error_cls=NetworkError) from e
        except OSError as e:
            raise self._cannot_access_error(time_range,
                                            region,
                                            var_names,
                                            verb="open",
                                            cause=e) from e