def test_open_xarray_dataset(self): path_large = os.path.join(_TEST_DATA_PATH, 'large', '*.nc') path_small = os.path.join(_TEST_DATA_PATH, 'small', '*.nc') ds_large_mon = RecordingMonitor() ds_small_mon = RecordingMonitor() ds_large = open_xarray_dataset(path_large, monitor=ds_large_mon) ds_small = open_xarray_dataset(path_small, monitor=ds_small_mon) # Test monitors self.assertEqual(ds_large_mon.records, [('start', 'Opening dataset', 1), ('progress', 1, None, 100), ('done', )]) self.assertEqual(ds_small_mon.records, [('start', 'Opening dataset', 1), ('progress', 1, None, 100), ('done', )]) # Test chunking self.assertEqual(ds_small.chunks, { 'lon': (1440, ), 'lat': (720, ), 'time': (1, ) }) self.assertEqual(ds_large.chunks, { 'lon': (7200, ), 'lat': (3600, ), 'time': (1, ), 'bnds': (2, ) })
def test_autochunking(self): path_large = op.join(_TEST_DATA_PATH, 'large', '*.nc') path_small = op.join(_TEST_DATA_PATH, 'small', '*.nc') ds_large = ds.open_xarray_dataset(path_large) ds_small = ds.open_xarray_dataset(path_small) large_expected = {'lat': (1800, 1800), 'time': (1,), 'bnds': (2,), 'lon': (3600, 3600)} small_expected = {'lat': (720,), 'time': (1,), 'lon': (1440,)} self.assertEqual(ds_small.chunks, small_expected) self.assertEqual(ds_large.chunks, large_expected)
def test_open_xarray(self): wrong_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nck') wrong_url = 'httpz://www.acme.com' path = [wrong_path, wrong_url] try: open_xarray_dataset(path) except IOError as e: self.assertEqual(str(e), 'File {} not found'.format(path)) right_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nc') wrong_url = 'httpz://www.acme.com' path = [right_path, wrong_url] dsa = open_xarray_dataset(path) self.assertIsNotNone(dsa)
def test_open_xarray(self): wrong_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nck') wrong_url = 'httpz://www.acme.com' path = [wrong_path, wrong_url] try: open_xarray_dataset(path) except IOError as e: self.assertEqual(str(e), 'File {} not found'.format(path)) right_path = os.path.join(_TEST_DATA_PATH, 'small', '*.nc') wrong_url = 'httpz://www.acme.com' path = [right_path, wrong_url] dsa = open_xarray_dataset(path) self.assertIsNotNone(dsa)
def test_open_xarray_dataset(self): path_large = os.path.join(_TEST_DATA_PATH, 'large', '*.nc') path_small = os.path.join(_TEST_DATA_PATH, 'small', '*.nc') ds_large_mon = RecordingMonitor() ds_small_mon = RecordingMonitor() ds_large = open_xarray_dataset(path_large, monitor=ds_large_mon) ds_small = open_xarray_dataset(path_small, monitor=ds_small_mon) # Test monitors self.assertEqual(ds_large_mon.records, [('start', 'Opening dataset', 1), ('progress', 1, None, 100), ('done',)]) self.assertEqual(ds_small_mon.records, [('start', 'Opening dataset', 1), ('progress', 1, None, 100), ('done',)]) # Test chunking self.assertEqual(ds_small.chunks, {'lon': (1440,), 'lat': (720,), 'time': (1,)}) self.assertEqual(ds_large.chunks, {'lon': (7200,), 'lat': (3600,), 'time': (1,), 'bnds': (2,)})
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None) -> Any: time_range = TimeRangeLike.convert(time_range) if time_range else None var_names = VarNamesLike.convert(var_names) if var_names else None selected_file_list = self._find_files(time_range) if not selected_file_list: msg = 'CCI Open Data Portal data source "{}"\ndoes not seem to have any datasets'.format(self.id) if time_range is not None: msg += ' in given time range {}'.format(TimeRangeLike.format(time_range)) raise DataAccessError(msg) files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP) try: ds = open_xarray_dataset(files) if region: ds = normalize_impl(ds) ds = subset_spatial_impl(ds, region) if var_names: ds = ds.drop([var_name for var_name in ds.data_vars.keys() if var_name not in var_names]) return ds except OSError as e: if time_range: raise DataAccessError("Cannot open remote dataset for time range {}:\n" "{}" .format(TimeRangeLike.format(time_range), e), source=self) from e else: raise DataAccessError("Cannot open remote dataset:\n" "{}" .format(TimeRangeLike.format(time_range), e), source=self) from e
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None) -> Any: time_range = TimeRangeLike.convert(time_range) if time_range else None if region: region = PolygonLike.convert(region) if var_names: var_names = VarNamesLike.convert(var_names) paths = [] if time_range: time_series = list(self._files.values()) file_paths = list(self._files.keys()) for i in range(len(time_series)): if time_series[i]: if isinstance(time_series[i], Tuple) and \ time_series[i][0] >= time_range[0] and \ time_series[i][1] <= time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) elif isinstance( time_series[i], datetime ) and time_range[0] <= time_series[i] < time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) else: for file in self._files.items(): paths.extend(self._resolve_file_path(file[0])) if paths: paths = sorted(set(paths)) try: ds = open_xarray_dataset(paths) if region: ds = normalize_impl(ds) ds = subset_spatial_impl(ds, region) if var_names: ds = ds.drop([ var_name for var_name in ds.data_vars.keys() if var_name not in var_names ]) return ds except OSError as e: if time_range: raise DataAccessError( "Cannot open local dataset for time range {}:\n" "{}".format(TimeRangeLike.format(time_range), e), source=self) from e else: raise DataAccessError("Cannot open local dataset:\n" "{}".format(e), source=self) from e else: if time_range: raise DataAccessError( "No local datasets available for\nspecified time range {}". format(TimeRangeLike.format(time_range)), source=self) else: raise DataAccessError("No local datasets available", source=self)
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None) -> Any: paths = self.resolve_paths(TimeRangeLike.convert(time_range) if time_range else (None, None)) unique_paths = list(set(paths)) existing_paths = [p for p in unique_paths if os.path.exists(p)] if len(existing_paths) == 0: raise ValueError('No local file available. Consider syncing the dataset.') return open_xarray_dataset(existing_paths)
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None, monitor: Monitor = Monitor.NONE) -> Any: paths = self.resolve_paths(TimeRangeLike.convert(time_range) if time_range else (None, None)) unique_paths = list(set(paths)) existing_paths = [p for p in unique_paths if os.path.exists(p)] if len(existing_paths) == 0: raise ValueError('No local file available. Consider syncing the dataset.') return open_xarray_dataset(existing_paths, region=region, var_names=var_names, monitor=monitor)
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None, monitor: Monitor = Monitor.NONE) -> Any: time_range = TimeRangeLike.convert(time_range) if time_range else None var_names = VarNamesLike.convert(var_names) if var_names else None paths = [] if time_range: time_series = list(self._files.values()) file_paths = list(self._files.keys()) for i in range(len(time_series)): if time_series[i]: if isinstance(time_series[i], Tuple) and \ time_series[i][0] >= time_range[0] and \ time_series[i][1] <= time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) elif isinstance(time_series[i], datetime) and time_range[0] <= time_series[i] < time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) else: for file in self._files.items(): paths.extend(self._resolve_file_path(file[0])) if not paths: raise self._empty_error(time_range) paths = sorted(set(paths)) try: excluded_variables = self._meta_info.get('exclude_variables') if excluded_variables: drop_variables = [variable.get('name') for variable in excluded_variables] else: drop_variables = None # TODO: combine var_names and drop_variables return open_xarray_dataset(paths, region=region, var_names=var_names, drop_variables=drop_variables, monitor=monitor) except HTTPError as e: raise self._cannot_access_error(time_range, region, var_names, verb="open", cause=e) from e except (URLError, socket.timeout) as e: raise self._cannot_access_error(time_range, region, var_names, verb="open", cause=e, error_cls=NetworkError) from e except OSError as e: raise self._cannot_access_error(time_range, region, var_names, verb="open", cause=e) from e
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None) -> Any: time_range = TimeRangeLike.convert(time_range) if time_range else None if region: region = PolygonLike.convert(region) if var_names: var_names = VarNamesLike.convert(var_names) paths = [] if time_range: time_series = list(self._files.values()) file_paths = list(self._files.keys()) for i in range(len(time_series)): if time_series[i]: if isinstance(time_series[i], Tuple) and \ time_series[i][0] >= time_range[0] and \ time_series[i][1] <= time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) elif isinstance(time_series[i], datetime) and \ time_range[0] <= time_series[i] < time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) else: for file in self._files.items(): paths.extend(self._resolve_file_path(file[0])) if paths: paths = sorted(set(paths)) try: ds = open_xarray_dataset(paths) if region: [lat_min, lon_min, lat_max, lon_max] = region.bounds ds = ds.sel(drop=False, lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max)) if var_names: ds = ds.drop([ var_name for var_name in ds.variables.keys() if var_name not in var_names ]) return ds except OSError as e: raise IOError("Files: {} caused:\nOSError({}): {}".format( paths, e.errno, e.strerror)) else: return None
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None) -> Any: time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None var_names = VarNamesLike.convert(var_names) if var_names else None selected_file_list = self._find_files(time_range) if not selected_file_list: msg = 'Data source \'{}\' does not seem to have any data files'.format( self.name) if time_range is not None: msg += ' in given time range {}'.format( TimeRangeLike.format(time_range)) raise IOError(msg) files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP) try: ds = open_xarray_dataset(files) if region: [lat_min, lon_min, lat_max, lon_max] = region.bounds ds = ds.sel(drop=False, lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max)) if var_names: ds = ds.drop([ var_name for var_name in ds.variables.keys() if var_name not in var_names ]) return ds except OSError as e: raise IOError("Files: {} caused:\nOSError({}): {}".format( files, e.errno, e.strerror))
def open_dataset(self, time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, protocol: str = None, monitor: Monitor = Monitor.NONE) -> Any: time_range = TimeRangeLike.convert(time_range) if time_range else None var_names = VarNamesLike.convert(var_names) if var_names else None paths = [] if time_range: time_series = list(self._files.values()) file_paths = list(self._files.keys()) for i in range(len(time_series)): if time_series[i]: if isinstance(time_series[i], Tuple) and \ time_series[i][0] >= time_range[0] and \ time_series[i][1] <= time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) elif isinstance( time_series[i], datetime ) and time_range[0] <= time_series[i] < time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) else: for file in self._files.items(): paths.extend(self._resolve_file_path(file[0])) if not paths: raise self._empty_error(time_range) paths = sorted(set(paths)) try: excluded_variables = self._meta_info.get('exclude_variables') if excluded_variables: drop_variables = [ variable.get('name') for variable in excluded_variables ] else: drop_variables = None # TODO: combine var_names and drop_variables return open_xarray_dataset(paths, region=region, var_names=var_names, drop_variables=drop_variables, monitor=monitor) except HTTPError as e: raise self._cannot_access_error(time_range, region, var_names, verb="open", cause=e) from e except (URLError, socket.timeout) as e: raise self._cannot_access_error(time_range, region, var_names, verb="open", cause=e, error_cls=NetworkError) from e except OSError as e: raise self._cannot_access_error(time_range, region, var_names, verb="open", cause=e) from e