Exemplo n.º 1
0
    def __init__(self,
                 local_ftp: str = "",
                 ds: str = "",
                 cache: bool = False,
                 cachedir: str = "",
                 dimension: str = 'point',
                 **kwargs):
        """ Init fetcher

            Parameters
            ----------
            local_ftp : str
                Path to the local directory where the 'dac' folder is located.
            ds : str
                Name of the dataset to load. Use the global OPTIONS['dataset'] by default.
            cache : bool
                Determine if retrieved data should be cached locally or not, False by default.
            cachedir : str
                Absolute path to the cache directory
            dimension : str
                Main dimension of the output dataset. This can be "profile" to retrieve a collection of
                profiles, or "point" (default) to have data as a collection of measurements.
                This can be used to optimise performances

        """
        self.cache = cache
        self.cachedir = cachedir
        self.fs = filestore(cache=self.cache, cachedir=self.cachedir)
        self.definition = 'Local ftp Argo data fetcher'
        self.dataset_id = OPTIONS['dataset'] if ds == '' else ds
        self.local_ftp = OPTIONS['local_ftp'] if local_ftp == '' else local_ftp
        self.init(**kwargs)
Exemplo n.º 2
0
 def test_cachefile(self):
     try:
         fs = filestore(cache=1, cachedir=self.testcachedir)
         fs.open_dataframe(self.csvfile, skiprows=8, header=0)
         assert isinstance(fs.cachepath(self.csvfile), str)
         shutil.rmtree(self.testcachedir)
     except Exception:
         shutil.rmtree(self.testcachedir)
         raise
Exemplo n.º 3
0
 def test_clear_cache(self):
     # Create dummy data to read and cache:
     uri = os.path.abspath("dummy_fileA.txt")
     with open(uri, "w") as fp:
         fp.write('Hello world!')
     # Create store:
     fs = filestore(cache=1, cachedir=self.testcachedir)
     # Then we read some dummy data from the dummy file to trigger caching
     with fs.open(uri, "r") as fp:
         txt = fp.read()
     assert isinstance(fs.cachepath(uri), str)
     # Now, we can clear the cache:
     fs.clear_cache()
     # And verify it does not exist anymore:
     with pytest.raises(CacheFileNotFound):
         fs.cachepath(uri)
     os.remove(uri)
Exemplo n.º 4
0
 def test_open_mfdataset(self):
     fs = filestore()
     ncfiles = fs.glob(
         os.path.sep.join(
             [self.ftproot, "dac/aoml/5900446/profiles/*_1*.nc"]))[0:2]
     for method in ["seq", "thread", "process"]:
         for progress in [True, False]:
             assert isinstance(
                 fs.open_mfdataset(ncfiles,
                                   method=method,
                                   progress=progress),
                 xr.Dataset,
             )
             assert is_list_of_datasets(
                 fs.open_mfdataset(ncfiles,
                                   method=method,
                                   progress=progress,
                                   concat=False))
Exemplo n.º 5
0
 def test_clear_cache(self):
     with tempfile.TemporaryDirectory() as cachedir:
         # Create dummy data to read and cache:
         uri = os.path.abspath("dummy_fileA.txt")
         with open(uri, "w") as fp:
             fp.write("Hello world!")
         # Create store:
         fs = filestore(cache=True, cachedir=cachedir)
         # Then we read some dummy data from the dummy file to trigger caching
         with fs.open(uri, "r") as fp:
             fp.read()
         assert isinstance(fs.cachepath(uri), str)
         assert os.path.isfile(fs.cachepath(uri))
         # Now, we can clear the cache:
         fs.clear_cache()
         # And verify it does not exist anymore:
         with pytest.raises(CacheFileNotFound):
             fs.cachepath(uri)
         os.remove(uri)  # Delete dummy file
Exemplo n.º 6
0
    def __init__(self,
                 local_ftp: str = "",
                 ds: str = "",
                 cache: bool = False,
                 cachedir: str = "",
                 dimension: str = 'point',
                 errors: str = 'raise',
                 parallel: bool = False,
                 parallel_method: str = 'thread',
                 progress: bool = False,
                 chunks: str = 'auto',
                 chunks_maxsize: dict = {},
                 **kwargs):
        """ Init fetcher

        Parameters
        ----------
        local_ftp: str (optional)
            Path to the local directory where the 'dac' folder is located.
        ds: str (optional)
            Dataset to load: 'phy' or 'ref' or 'bgc'
        errors: str (optional)
            If set to 'raise' (default), will raise a NetCDF4FileNotFoundError error if any of the requested
            files cannot be found. If set to 'ignore', the file not found is skipped when fetching data.
        cache: bool (optional)
            Cache data or not (default: False)
        cachedir: str (optional)
            Path to cache folder
        dimension: str
            Main dimension of the output dataset. This can be "profile" to retrieve a collection of
            profiles, or "point" (default) to have data as a collection of measurements.
            This can be used to optimise performances.
        parallel: bool (optional)
            Chunk request to use parallel fetching (default: False)
        parallel_method: str (optional)
            Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`.
        progress: bool (optional)
            Show a progress bar or not when fetching data.
        chunks: 'auto' or dict of integers (optional)
            Dictionary with request access point as keys and number of chunks to create as values.
            Eg:

                - ``{'wmo': 10}`` will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``.
                - ``{'lon': 2}`` will create a maximum of 2 chunks along longitude when used with ``Fetch_box``.

        chunks_maxsize: dict (optional)
            Dictionary with request access point as keys and chunk size as values (used as maximum values in
            'auto' chunking).
            Eg: ``{'wmo': 5}`` will create chunks with as many as 5 WMOs each.
        """
        self.cache = cache
        self.cachedir = cachedir
        self.fs = filestore(cache=self.cache, cachedir=self.cachedir)
        self.errors = errors

        if not isinstance(parallel, bool):
            # The parallelization method is passed through the argument 'parallel':
            parallel_method = parallel
            if parallel in ['thread', 'process']:
                parallel = True
        if parallel_method not in ["thread", "process"]:
            raise ValueError("localftp only support multi-threading and processing ('%s' unknown)" % parallel_method)
        self.parallel = parallel
        self.parallel_method = parallel_method
        self.progress = progress
        self.chunks = chunks
        self.chunks_maxsize = chunks_maxsize

        self.definition = 'Local ftp Argo data fetcher'
        self.dataset_id = OPTIONS['dataset'] if ds == '' else ds

        self.local_ftp = OPTIONS['local_ftp'] if local_ftp == '' else local_ftp
        check_localftp(self.local_ftp, errors='raise')  # Validate local_ftp

        self.init(**kwargs)
Exemplo n.º 7
0
 def test_open_dataframe(self):
     fs = filestore()
     assert isinstance(
         fs.open_dataframe(self.csvfile, skiprows=8, header=0),
         pd.core.frame.DataFrame)
Exemplo n.º 8
0
 def test_open_dataset(self):
     ncfile = os.path.sep.join(
         [self.ftproot, "dac/aoml/5900446/5900446_prof.nc"])
     fs = filestore()
     assert isinstance(fs.open_dataset(ncfile), xr.Dataset)
Exemplo n.º 9
0
 def test_nocachefile(self):
     fs = filestore(cache=1)
     with pytest.raises(CacheFileNotFound):
         fs.cachepath("dummy_uri")
Exemplo n.º 10
0
 def test_cache(self):
     fs = filestore(cache=1)
     assert isinstance(
         fs.fs, fsspec.implementations.cached.WholeFileCacheFileSystem)
Exemplo n.º 11
0
 def test_nocache(self):
     fs = filestore(cache=0)
     with pytest.raises(FileSystemHasNoCache):
         fs.cachepath("dummy_uri")
Exemplo n.º 12
0
 def test_creation(self):
     fs = filestore(cache=0)
     assert isinstance(fs.fs, fsspec.implementations.local.LocalFileSystem)
Exemplo n.º 13
0
 def test_cachefile(self):
     with tempfile.TemporaryDirectory() as cachedir:
         fs = filestore(cache=True, cachedir=cachedir)
         fs.read_csv(self.csvfile, skiprows=8, header=0)
         assert isinstance(fs.cachepath(self.csvfile), str)
Exemplo n.º 14
0
 def test_read_csv(self):
     fs = filestore()
     assert isinstance(fs.read_csv(self.csvfile, skiprows=8, header=0),
                       pd.core.frame.DataFrame)
Exemplo n.º 15
0
 def test_glob(self):
     fs = filestore()
     assert isinstance(fs.glob(os.path.sep.join([self.ftproot, "dac/*"])),
                       list)