Exemplo n.º 1
0
    def list_argo_files(self, errors: str = 'raise'):
        """ Set the internal list of files to load

        Parameters
        ----------
        errors: {'raise','ignore'}, optional
            If 'raise' (default), raises a NetCDF4FileNotFoundError error if any of the requested
            files cannot be found. If 'ignore', file not found is skipped when fetching data.
        """
        if not hasattr(self, '_list_of_argo_files'):
            self._list_of_argo_files = []
            # Fetch the index to retrieve the list of profiles to load:
            filt = indexfilter_box(self.BOX)
            df_index = self.fs_index.open_dataframe(filt)
            if isinstance(df_index, pd.core.frame.DataFrame):
                # Ok, we found profiles in the index file,
                # so now we can make sure these files exist:
                lst = list(df_index['file'])
                for file in lst:
                    abs_file = os.path.sep.join([self.local_ftp, "dac", file])
                    if self.fs.exists(abs_file):
                        self._list_of_argo_files.append(abs_file)
                    elif errors == 'raise':
                        raise NetCDF4FileNotFoundError(abs_file)
                    else:
                        # Otherwise remain silent/ignore
                        # todo should raise a warning instead ?
                        return None
        return self
Exemplo n.º 2
0
    def uri(self):
        """ List of files to load for a request

        Returns
        -------
        list(str)
        """
        if not hasattr(self, '_list_of_argo_files'):
            self._list_of_argo_files = []
            # Fetch the index to retrieve the list of profiles to load:
            filt = indexfilter_box(self.indexBOX)
            df_index = self.fs_index.read_csv(filt)
            if isinstance(df_index, pd.core.frame.DataFrame):
                # Ok, we found profiles in the index file,
                # so now we can make sure these files exist:
                lst = list(df_index['file'])
                for file in lst:
                    abs_file = os.path.sep.join([self.local_ftp, "dac", file])
                    if self.fs.exists(abs_file):
                        self._list_of_argo_files.append(abs_file)
                    elif self.errors == 'raise':
                        raise NetCDF4FileNotFoundError(abs_file)
                    else:
                        # Otherwise remain silent/ignore
                        # todo should raise a warning instead ?
                        return None
        return self._list_of_argo_files
Exemplo n.º 3
0
    def _absfilepath(self,
                     wmo: int,
                     cyc: int = None,
                     errors: str = 'raise') -> str:
        """ Set absolute netcdf file path to load for a given wmo/cyc pair

        Parameters
        ----------
        wmo: int
            WMO float code
        cyc: int, optional
            Cycle number (None by default)
        errors: {'raise','ignore'}, optional
            If 'raise' (default), raises a NetCDF4FileNotFoundError error if the requested
            file cannot be found. If 'ignore', return None silently.

        Returns
        -------
        netcdf_file_path : str
        """
        p = self._filepathpattern(wmo, cyc)
        l = sorted(glob(p))
        if len(l) == 1:
            return l[0]
        elif len(l) == 0:
            if errors == 'raise':
                raise NetCDF4FileNotFoundError(p)
            else:
                # Otherwise remain silent/ignore
                #todo should raise a warning instead ?
                return None
        else:
            warnings.warn(
                "More than one file to load for a single float cycle ! Return the 1st one by default."
            )
            # The choice of the file to load depends on the user mode and dataset requested.
            #todo define a robust choice
            if self.dataset_id == 'phy':
                # Use the synthetic profile:
                l = [
                    file for file in l if [
                        file for file in [os.path.split(w)[-1] for w in l]
                        if file[0] == 'S'
                    ][0] in file
                ]
                # print('phy', l[0])
            elif self.dataset_id == 'bgc':
                l = [
                    file for file in l if [
                        file for file in [os.path.split(w)[-1] for w in l]
                        if file[0] == 'M'
                    ][0] in file
                ]
                # print('bgc:', l)
            return l[0]
Exemplo n.º 4
0
 def foobar():
     raise NetCDF4FileNotFoundError("invalid_path")
Exemplo n.º 5
0
    def _absfilepath(self,
                     wmo: int,
                     cyc: int = None,
                     errors: str = 'raise') -> str:
        """ Return the absolute netcdf file path to load for a given wmo/cyc pair

        Based on the dataset, the wmo and the cycle requested, return the absolute path toward the file to load.

        The file is searched using its expected file name pattern (following GDAC conventions).

        If more than one file are found to match the pattern, the first 1 (alphabeticaly) is returned.

        If no files match the pattern, the function can raise an error or fail silently and return None.

        Parameters
        ----------
        wmo: int
            WMO float code
        cyc: int, optional
            Cycle number (None by default)
        errors: {'raise', 'ignore'}, optional
            If 'raise' (default), raises a NetCDF4FileNotFoundError error if the requested
            file cannot be found. If set to 'ignore', return None silently.

        Returns
        -------
        netcdf_file_path : str
        """

        # This function will be used whatever the access point, since we are working with a GDAC like set of files
        def _filepathpattern(wmo, cyc=None):
            """ Return a file path pattern to scan for a given wmo/cyc pair

            Based on the dataset and the cycle number requested, construct the closest file path pattern to be loaded

            This path is absolute, the pattern can contain '*', and it is the file path, so it has '.nc' extension

            Returns
            -------
            file_path_pattern : str
            """
            if cyc is None:
                # Multi-profile file:
                # <FloatWmoID>_prof.nc
                if self.dataset_id == 'phy':
                    return os.path.sep.join([
                        self.local_ftp, "dac", "*",
                        str(wmo),
                        "%i_prof.nc" % wmo
                    ])
                elif self.dataset_id == 'bgc':
                    return os.path.sep.join([
                        self.local_ftp, "dac", "*",
                        str(wmo),
                        "%i_Sprof.nc" % wmo
                    ])
            else:
                # Single profile file:
                # <B/M/S><R/D><FloatWmoID>_<XXX><D>.nc
                if cyc < 1000:
                    return os.path.sep.join([
                        self.local_ftp, "dac", "*",
                        str(wmo), "profiles",
                        "*%i_%0.3d*.nc" % (wmo, cyc)
                    ])
                else:
                    return os.path.sep.join([
                        self.local_ftp, "dac", "*",
                        str(wmo), "profiles",
                        "*%i_%0.4d*.nc" % (wmo, cyc)
                    ])

        pattern = _filepathpattern(wmo, cyc)
        lst = sorted(glob(pattern))
        # lst = sorted(self.fs.glob(pattern))  # Much slower than the regular glob !
        if len(lst) == 1:
            return lst[0]
        elif len(lst) == 0:
            if errors == 'raise':
                raise NetCDF4FileNotFoundError(pattern)
            else:
                # Otherwise remain silent/ignore
                # todo: should raise a warning instead ?
                return None
        else:
            # warnings.warn("More than one file to load for a single float cycle ! Return the 1st one by default.")
            # The choice of the file to load depends on the user mode and dataset requested.
            # todo: define a robust choice
            if self.dataset_id == 'phy':
                if cyc is None:
                    # Use the synthetic profile:
                    lst = [
                        file for file in lst if [
                            file
                            for file in [os.path.split(w)[-1] for w in lst]
                            if file[0] == 'S'
                        ][0] in file
                    ]
                else:
                    # Use the ascent profile:
                    lst = [
                        file for file in lst if [
                            file
                            for file in [os.path.split(w)[-1] for w in lst]
                            if file[-1] != 'D'
                        ][0] in file
                    ]
                # print('phy', lst[0])
            elif self.dataset_id == 'bgc':
                lst = [
                    file for file in lst if [
                        file for file in [os.path.split(w)[-1] for w in lst]
                        if file[0] == 'M'
                    ][0] in file
                ]
                # print('bgc:', lst)
            return lst[0]