def __init__(self, local_ftp: str = "", index_file: str = "ar_index_global_prof.txt", cache: bool = False, cachedir: str = "", **kwargs): """ Init fetcher Parameters ---------- local_path : str Path to the directory with the 'dac' folder and index file """ self.cache = cache self.definition = 'Local ftp Argo index fetcher' self.local_ftp = OPTIONS['local_ftp'] if local_ftp == '' else local_ftp check_localftp(self.local_ftp, errors='raise') # Validate local_ftp self.index_file = index_file self.fs = indexstore(cache, cachedir, os.path.sep.join([self.local_ftp, self.index_file])) self.dataset_id = 'index' self.init(**kwargs)
def test_check_localftp(): assert check_localftp("dummy_path", errors='ignore') is False with pytest.raises(FtpPathError): check_localftp("dummy_path", errors='raise') with pytest.warns(UserWarning): assert check_localftp("dummy_path", errors='warn') is False
def __init__(self, local_ftp: str = "", ds: str = "", cache: bool = False, cachedir: str = "", dimension: str = 'point', errors: str = 'raise', parallel: bool = False, parallel_method: str = 'thread', progress: bool = False, chunks: str = 'auto', chunks_maxsize: dict = {}, **kwargs): """ Init fetcher Parameters ---------- local_ftp: str (optional) Path to the local directory where the 'dac' folder is located. ds: str (optional) Dataset to load: 'phy' or 'ref' or 'bgc' errors: str (optional) If set to 'raise' (default), will raise a NetCDF4FileNotFoundError error if any of the requested files cannot be found. If set to 'ignore', the file not found is skipped when fetching data. cache: bool (optional) Cache data or not (default: False) cachedir: str (optional) Path to cache folder dimension: str Main dimension of the output dataset. This can be "profile" to retrieve a collection of profiles, or "point" (default) to have data as a collection of measurements. This can be used to optimise performances. parallel: bool (optional) Chunk request to use parallel fetching (default: False) parallel_method: str (optional) Define the parallelization method: ``thread``, ``process`` or a :class:`dask.distributed.client.Client`. progress: bool (optional) Show a progress bar or not when fetching data. chunks: 'auto' or dict of integers (optional) Dictionary with request access point as keys and number of chunks to create as values. Eg: - ``{'wmo': 10}`` will create a maximum of 10 chunks along WMOs when used with ``Fetch_wmo``. - ``{'lon': 2}`` will create a maximum of 2 chunks along longitude when used with ``Fetch_box``. chunks_maxsize: dict (optional) Dictionary with request access point as keys and chunk size as values (used as maximum values in 'auto' chunking). Eg: ``{'wmo': 5}`` will create chunks with as many as 5 WMOs each. """ self.cache = cache self.cachedir = cachedir self.fs = filestore(cache=self.cache, cachedir=self.cachedir) self.errors = errors if not isinstance(parallel, bool): # The parallelization method is passed through the argument 'parallel': parallel_method = parallel if parallel in ['thread', 'process']: parallel = True if parallel_method not in ["thread", "process"]: raise ValueError("localftp only support multi-threading and processing ('%s' unknown)" % parallel_method) self.parallel = parallel self.parallel_method = parallel_method self.progress = progress self.chunks = chunks self.chunks_maxsize = chunks_maxsize self.definition = 'Local ftp Argo data fetcher' self.dataset_id = OPTIONS['dataset'] if ds == '' else ds self.local_ftp = OPTIONS['local_ftp'] if local_ftp == '' else local_ftp check_localftp(self.local_ftp, errors='raise') # Validate local_ftp self.init(**kwargs)