示例#1
0
def to_datestring_YYYYMMDD(value):
    """Convert input time to string with format YYYYMMDD
    
    Parameters
    ----------
    value
        input time, may be string, datetime, numpy.datetime64 or 
        pandas.Timestamp
        
    Returns
    -------
    str
        input formatted to string YYYYMMDD
        
    Raises
    ------
    ValueError
        if input is not supported
    """
    if isinstance(value, str) and len(value, 8):
        logger.info('Input is already string containing 8 chars. Assuming it '
                    'is in the right format and returning unchanged')
        return value
    try:
        return to_pandas_timestamp(value).strftime('%Y%m%d')
    except Exception as e:
        raise ValueError('Invalid input, need str, datetime, numpy.datetime64 '
                         'or pandas.Timestamp. Error: {}'.format(repr(e)))
示例#2
0
    def find_data_dir(self, name_or_pattern, ignorecase=True):
        """Find match of input name or pattern in Aerocom database

        Parameters
        ----------
        name_or_pattern : str
            name or pattern of data (can be model or obs data)
        ignorecase : bool
            if True, upper / lower case is ignored

        Returns
        -------
        str
            data directory of match

        Raises
        ------
        DataSearchError
            if no matches or no unique match can be found
        """
        if name_or_pattern in self:
            logger.info('{} found in instance of AerocomBrowser'.format(
                name_or_pattern))
            return self[name_or_pattern]
        logger.info('Searching database for {}'.format(name_or_pattern))
        return self._browse(name_or_pattern,
                            ignorecase=ignorecase,
                            return_if_match=True)  #returns list
示例#3
0
文件: filter.py 项目: ejgal/pyaerocom
 def apply(self, data_obj):
     """Apply filter to data object
     
     Parameters
     ----------
     data_obj : :obj:`UngriddedData`, :obj:`GriddedData`
         input data object that is supposed to be filtered
         
     Returns
     -------
     :obj:`UngriddedData`, :obj:`GriddedData`
         filtered data object
         
     Raises
     ------
     IOError
         if input is invalid
     """
     if self.name == self.NO_FILTER_NAME:
         logger.info('NO FILTER flag: {} -> no filtering will be applied '
                     'in {}. Returning unchanged object.'.format(
                         self.NO_FILTER_NAME, type(data_obj)))
         return data_obj
     if isinstance(data_obj, UngriddedData):
         return self._apply_ungridded(data_obj)
     elif isinstance(data_obj, GriddedData):
         return self._apply_gridded(data_obj)
     elif isinstance(data_obj, ColocatedData):
         return self._apply_colocated(data_obj)
     raise IOError('Cannot filter {} obj, need instance of GriddedData or '
                   'UngriddedData'.format(type(data_obj)))
示例#4
0
def check_and_regrid_lons_cube(cube):
    """Checks and corrects for if longitudes of :attr:`grid` are 0 -> 360
    
    Note
    ----
    This method checks if the maximum of the current longitudes array
    exceeds 180. Thus, it is not recommended to use this function after
    subsetting a cube, rather, it should be checked directly when the 
    file is loaded (cf. :func:`load_input`)
    
    Parameters
    ----------
    cube : iris.cube.Cube
        gridded data loaded as iris.Cube
        
    Returns
    -------
    bool
        True, if longitudes were on 0 -> 360 and have been rolled, else
        False
    """
    if cube.coord("longitude").points.max() > 180:
        logger.info("Rolling longitudes to -180 -> 180 definition")
        cube = cube.intersection(longitude=(-180, 180))
    return cube
示例#5
0
    def __init__(self, var_name="od550aer", init=True, cfg=None, **kwargs):
        #save orig. input for whatever reasons
        self._var_name_input = var_name
        self.is_3d = False
        self.is_dry = False

        var_name = var_name.lower()

        if '3d' in var_name:
            logger.info('Variable name {} contains 3d. Activating flag is_3d '
                        'and removing from var_name string'.format(var_name))
            var_name = var_name.replace('3d', '')
            self.is_3d = True
        if 'dry' in var_name:
            self.is_dry = True
            var_name_alt = var_name.replace('dry', '')
        else:
            var_name_alt = var_name

        self.var_name = var_name
        self.var_name_alt = var_name_alt  #alternative var_name
        self.standard_name = None
        self.units = '1'
        #self.aliases = []
        self.wavelength_nm = None
        self.dry_rh_max = None
        self.dimensions = None
        self.minimum = -9e30
        self.maximum = 9e30

        self.description = None
        self.comments_and_purpose = None

        #wavelength tolerance in nm
        self.obs_wavelength_tol_nm = None

        self.scat_xlim = None
        self.scat_ylim = None
        self.scat_loglog = None
        self.scat_scale_factor = 1.0

        # settings for map plotting
        self.map_vmin = None
        self.map_vmax = None
        self.map_c_under = None
        self.map_c_over = None
        self.map_cbar_levels = None
        self.map_cbar_ticks = None
        # imports default information and, on top, variable information (if
        # applicable)
        if init:
            self.parse_from_ini(var_name,
                                var_name_alt=self.var_name_alt,
                                cfg=cfg)

        self.update(**kwargs)
        if self.obs_wavelength_tol_nm is None:
            self.obs_wavelength_tol_nm = OBS_WAVELENGTH_TOL_NM
示例#6
0
    def __init__(self, var_ini=None, var_csv=None):

        self.var_ini = var_ini
        self.var_csv = var_csv

        self._cfg = self._read_ini()

        self.all_vars = [k for k in self._cfg.keys()]

        logger.info("Importing variable aliases info")
        self.all_vars.extend(list(_read_alias_ini()))
示例#7
0
    def __init__(self, var_ini):
        self._all_vars = None
        self._var_ini = None

        self.var_ini = var_ini

        self._cfg_parser = parse_variables_ini(var_ini)
        self._alias_parser = parse_aliases_ini()
        self._idx = -1

        logger.info("Importing variable aliases info")
示例#8
0
 def check_dimensions(self):
     """Checks if data source and time dimension are at the right index"""
     dims = self.data.dims
     if not 2 < len(dims) < 5:
         logger.info('Invalid number of dimensions. Must be 3 or 4')
         return False
     try:
         if dims.index('data_source') == 0 and dims.index('time') == 1:
             return True
         raise Exception
     except:
         return False
示例#9
0
    def parse_from_ini(self, var_name=None, cfg=None):
        """Import information about default region

        Parameters
        ----------
        var_name : str
            variable name
        var_name_alt : str
            alternative variable name that is used if variable name is not
            available
        cfg : ConfigParser
            open config parser object

        Returns
        -------
        bool
            True, if default could be loaded, False if not

        Raises
        ------
        IOError
            if regions.ini file does not exist
        """
        if cfg is None:
            cfg = self.read_config()

        if not var_name in cfg:
            try:
                var_name = self._check_aliases(var_name)
            except VariableDefinitionError:
                logger.info('Unknown input variable {}'.format(var_name))
                return
            self._var_name_aerocom = var_name

        var_info = cfg[var_name]
        # this variable should import settings from another variable
        if 'use' in var_info:
            use = var_info['use']
            if not use in cfg:
                raise VariableDefinitionError(
                    'Input variable {} depends on {} '
                    'which is not available in '
                    'variables.ini.'.format(var_name, use))
            self.parse_from_ini(use, cfg)

        for key, val in var_info.items():
            if key in self.ALT_NAMES:
                key = self.ALT_NAMES[key]
            self._add(key, val)
示例#10
0
 def to_timeseries(self, var_name, freq=None, resample_how='mean'):
     """Get pandas.Series object for one of the data columns
     
     Parameters
     ----------
     var_name : str
         name of variable (e.g. "od550aer")
     freq : str
         new temporal resolution (can be pandas freq. string, or pyaerocom
         ts_type)
     resample_how : str
         choose from mean or median (only relevant if input parameter freq 
         is provided, i.e. if resampling is applied)
         
     Returns
     -------
     Series
         time series object
     
     Raises 
     ------
     KeyError
         if variable key does not exist in this dictionary
     ValueError
         if length of data array does not equal the length of the time array
     """
     if not var_name in self:
         raise KeyError("Variable {} does not exist".format(var_name))
     self.check_dtime()
     data = self[var_name]
     if isinstance(data, pd.Series):
         logger.info('Data is already instance of pandas.Series')
         return data
     if not data.ndim == 1:
         raise NotImplementedError('Multi-dimensional data columns cannot '
                                   'be converted to time-series')
     if not len(data) == len(self.dtime):
         raise ValueError("Mismatch between length of data array for "
                          "variable {} (length: {}) and time array  "
                          "(length: {}).".format(var_name, len(data),
                                                 len(self.dtime)))
     s = pd.Series(data, index=self.dtime)
     if freq is not None:
         from pyaerocom.helpers import resample_timeseries
         s = resample_timeseries(s, freq, resample_how)
     return s
示例#11
0
 def write(self, data):
     """Write instance of UngriddedData to cache
     
     Parameters
     ----------
     data : UngriddedData
         object containing the data
     """
     if not self.connection_established:
         # TODO: may be updated in the future
         raise AerocomConnectionError('Cannot write Cache file, connection '
                                      'to Aerocom database could not be '
                                      'established (required for checking '
                                      'revision)')
     if not isinstance(data, UngriddedData):
         raise TypeError('Invalid input, need instance of UngriddedData, '
                         'got {}'.format(type(data)))
     logger.info('Writing cache file: {}'.format(self.file_path))
     success=True
     # OutHandle = gzip.open(c__cache_file, 'wb') # takes too much time
     out_handle = open(self.file_path, 'wb')
     try:
         pickle.dump(self.newest_file_in_read_dir, out_handle, 
                     pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.newest_file_date_in_read_dir, out_handle, 
                     pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.reader.data_revision, out_handle, 
                     pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.reader.__version__, out_handle, 
                     pickle.HIGHEST_PROTOCOL)
         pickle.dump(UngriddedData.__version__, out_handle, 
                     pickle.HIGHEST_PROTOCOL)
         pickle.dump(self.__version__, out_handle, 
                     pickle.HIGHEST_PROTOCOL)
         pickle.dump(data, out_handle, pickle.HIGHEST_PROTOCOL)
     except:
         logger.exception('Failed to write cache')
         success=False
     finally:    
         out_handle.close()
         if not success:
             os.remove(self.file_path)
     
     logger.info('Success!')
示例#12
0
 def check_and_load(self):
     if not os.path.isfile(self.file_path):
         logger.info('No cache file available for query of dataset '
                     '{}'.format(self.dataset_to_read))
         return False
     
     delete_existing = False
     in_handle = open(self.file_path, 'rb')
     # read meta information about file
     if self.connection_established:
         try:
             use_cache_file = self._check_pkl_head_vs_database(in_handle)
         except Exception as e:
             use_cache_file = False
             delete_existing = True
             logger.exception('File error in cached data file {}. File will '
                              'be removed and data reloaded'
                              'Error: {}'.format(self.file_path,
                                      repr(e)))
         if not use_cache_file:
             # TODO: Should we delete the cache file if it is outdated ???
             logger.info('Aborting reading cache file {}. Aerocom database '
                         'has changed compared to cached version'
                         .format(self.file_name))
             in_handle.close()
             if delete_existing: #something was wrong
                 os.remove(self.file_path)
             return False
     else:
         for k in range(self.LEN_CACHE_HEAD):
             logger.debug(pickle.load(in_handle))
     # everything is okay
     data = pickle.load(in_handle)
     if not isinstance(data, UngriddedData):
         raise TypeError('Unexpected data type stored in cache file, need '
                         'instance of UngriddedData, got {}'.format(type(data)))
     self.loaded_data = data
     logger.info('Successfully loaded data for {} from Cache'.format(self.dataset_to_read))
     return True
示例#13
0
def plotscatter(model_name,
                model_data=None,
                obs_data=None,
                opts=None,
                verbose=True):
    """Method to plot scatterplots

    Todo
    ----

    Complete docstring, review code

    """
    if verbose:
        change_verbosity(new_level='debug')

    plt_name = 'SCATTERLOG'
    var_to_run = opts['VariablesToRun'][0]

    # global settings (including plot settings) for variable
    VAR_PARAM = const.VAR_PARAM[var_to_run]

    obs_network_name = opts['ObsNetworkName'][0]
    obs_data_as_series = obs_data.to_timeseries(start_date=opts['StartDate'],
                                                end_date=opts['EndDate'],
                                                freq='D')
    obs_lats = [
        obs_data_as_series[i]['latitude']
        for i in range(len(obs_data_as_series))
    ]
    obs_lons = [
        obs_data_as_series[i]['longitude']
        for i in range(len(obs_data_as_series))
    ]
    obs_names = [
        obs_data_as_series[i]['station_name']
        for i in range(len(obs_data_as_series))
    ]
    # model_station_data = model_data.interpolate([("latitude", obs_lats), ("longitude", obs_lons)])
    # times_as_dt64 = pa.helpers.cftime_to_datetime64(model_station_data.time)
    # model_data_as_series = pa.helpers.to_time_series_griesie(model_station_data.grid.data, obs_lats, obs_lons,
    #                                                          times_as_dt64, var_name = [var_to_run])

    model_data_as_series = model_data.to_time_series([("latitude", obs_lats),
                                                      ("longitude", obs_lons)])

    df_time = pd.DataFrame()
    df_points = pd.DataFrame()
    station_no = 0
    for i in range(len(obs_data_as_series)):
        _len = len(obs_data_as_series[i][var_to_run])
        # print('{} length: {}'.format(obs_names[i],_len))
        if _len > 0:
            _nansum = np.nansum(obs_data_as_series[i][var_to_run])
            # _isnan = np.isnan(_nansum)
            # print('{} nansum: {:.3f}'.format(obs_names[i],np.nansum(obs_data_as_series[i][var_to_run])))
            # print('{} isnan: {}'.format(obs_names[i],_isnan))
            if _nansum > np.float_(0.):
                station_no += 1
                # print('{} station_no: {}'.format(obs_names[i],station_no))
            else:
                print('{} removed due to NaNs only'.format(obs_names[i]))
        else:
            continue
        # put obs and model in DataFrame to make them use the same time index
        df_time_temp = pd.DataFrame(obs_data_as_series[i][var_to_run],
                                    columns=[obs_network_name])
        df_points = df_points.append(df_time_temp)
        # df_time_temp[model_name] = model_data_as_series[i][var_to_run]*1.E3
        df_time_temp[model_name] = (model_data_as_series[i][var_to_run] *
                                    VAR_PARAM['scat_scale_factor'])
        # df_time has now all time steps where either one of the obs or model data have data
        #
        # df_points = df_points.append(pd.DataFrame(np.float_(df_time_temp.values), columns=df_time_temp.columns))
        df_time = df_time.append(
            pd.DataFrame(df_time_temp, columns=df_time_temp.columns))

    # remove all indices where either one of the data pairs is NaN
    # mainly done to get the number of days right.
    # df_time.corr() gets it right without
    df_time = df_time.dropna(axis=0, how='any')
    df_points = df_points.dropna()
    print('# of measurements: {}'.format(len(df_points)))

    filter_name = 'WORLD-wMOUNTAINS'
    filter_name = 'WORLD'
    time_step_name = 'mALLYEARdaily'
    # OD550_AER_an2008_YEARLY_WORLD_SCATTERLOG_AeronetSunV3Lev2.0.daily.ps.png
    # if df_time[model_name].index[0].year != df_time[model_name].index[-1].year:
    years_covered = df_time[model_name].index[:].year.unique().sort_values()
    if len(years_covered) > 1:
        figname = '{}_{}_an{}-{}_{}_{}_{}_{}.png'.format(
            model_name, var_to_run, years_covered[0], years_covered[-1],
            time_step_name, filter_name, plt_name, obs_network_name)
        plotname = "{}-{} {}".format(years_covered[0], years_covered[-1],
                                     'daily')
    else:
        figname = '{}_{}_an{}_{}_{}_{}_{}.png'.format(model_name, var_to_run,
                                                      years_covered[0],
                                                      time_step_name,
                                                      filter_name, plt_name,
                                                      obs_network_name)
        plotname = "{} {}".format(years_covered[0], 'daily')

    logger.info(figname)

    mean = df_time.mean()
    correlation_coeff = df_time.corr()
    # IDL: rms=sqrt(total((f_YData-f_Xdata)^2)/n_elements(f_YData))
    #sum = df_time.sum()
    # nmb=total(f_YData-f_Xdata)/total(f_Xdata)*100.
    # c=n_elements(f_YData)
    # f_temp=(f_YData-f_Xdata)/(f_YData+f_Xdata)
    # mnmb=2./c*total(f_temp)*100.
    # fge=2./c*total(abs(f_temp))*100.
    # f_YDatabc=f_YData*(total(f_Xdata,/nan)/total(f_YData,/nan)) ; bias corrected model data
    # rmsbc=sqrt(total((f_YDatabc-f_Xdata)^2)/n_elements(f_YDatabc))
    difference = df_time[model_name] - df_time[obs_network_name]
    num_points = len(df_time)
    rms = np.sqrt(np.nansum(np.power(difference.values, 2)) / num_points)
    nmb = np.sum(difference) / np.sum(df_time[obs_network_name]) * 100.
    tmp = (df_time[model_name] - df_time[obs_network_name]) / (
        df_time[model_name] + df_time[obs_network_name])
    mnmb = 2. / num_points * np.sum(tmp) * 100.
    fge = 2. / np.sum(np.abs(tmp)) * 100.

    df_time.plot.scatter(obs_network_name,
                         model_name,
                         loglog=VAR_PARAM['scat_loglog'],
                         marker='+',
                         color='black')
    # plot the 1 by 1 line
    plt.plot(VAR_PARAM['scat_xlim'], VAR_PARAM['scat_ylim'], '-', color='grey')
    plt.axes().set_aspect('equal')

    plt.xlim(VAR_PARAM['scat_xlim'])
    plt.ylim(VAR_PARAM['scat_ylim'])
    xypos_index = 0
    var_str = var_to_run + VAR_PARAM.unit_str
    plt.axes().annotate("{} #: {} # st: {}".format(var_str, len(df_time),
                                                   station_no),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=14,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('Obs: {:.3f}'.format(mean[obs_network_name]),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('Mod: {:.3f}'.format(mean[model_name]),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('NMB: {:.1f}%'.format(nmb),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('MNMB: {:.1f}%'.format(mnmb),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('R: {:.3f}'.format(correlation_coeff.values[0, 1]),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('RMS: {:.3f}'.format(rms),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('FGE: {:.3f}'.format(fge),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    # right lower part
    plt.axes().annotate('{}'.format(plotname),
                        xy=xypos[-2],
                        xycoords='axes fraction',
                        ha='center',
                        fontsize=10,
                        color='black')
    plt.axes().annotate('{}'.format(filter_name),
                        xy=xypos[-1],
                        xycoords='axes fraction',
                        ha='center',
                        fontsize=10,
                        color='black')

    plt.savefig(figname, dpi=300)
    plt.close()
示例#14
0
    def _browse(self, name_or_pattern, ignorecase=True, return_if_match=True):
        """Search all Aerocom data directories that match input name or pattern

        Note
        ----
        Please do not use this function but either
        Parameters
        ----------
        name_or_pattern : str
            name or pattern of data (can be model or obs data)
        ignorecase : bool
            if True, upper / lower case is ignored
        return_if_match : bool
            if True, then the data directory is returned as string, if it can
            be found, else, only a list is returned that contains all
            matches. The latter takes longer since the whole database is
            searched.

        Returns
        -------
        :obj:`str` or :obj:`list`
            Data directory (str, if ``return_if_match`` is True) or list
            containing valid Aerocom names (which can then be used to
            retrieve the paths)

        Raises
        ------
        DataSearchError
            if no match or no unique match can be found
        """
        pattern = fnmatch.translate(name_or_pattern)
        _candidates = []
        _msgs = []
        _warnings = []

        for obs_id, obs_path in const.OBSLOCS_UNGRIDDED.items():
            if ignorecase:
                match = name_or_pattern.lower() == obs_id.lower()
            else:
                match = name_or_pattern == obs_id
            if match:
                logger.info("Found match for search pattern in obs network "
                            "directories {}".format(obs_id))
                path = os.path.normpath(obs_path)
                if os.path.exists(path):
                    self[obs_id] = path
                    if return_if_match:
                        return path
            else:
                if ignorecase:
                    match = bool(re.search(pattern, obs_id, re.IGNORECASE))
                else:
                    match = bool(re.search(pattern, obs_id))
                if match:
                    path = os.path.normpath(obs_path)
                    if os.path.exists(path):
                        self[obs_id] = path
                        _candidates.append(obs_id)
                        if return_if_match:
                            return path

        for search_dir in const.DATA_SEARCH_DIRS:
            # get the directories
            if os.path.isdir(search_dir):
                #subdirs = listdir(search_dir)
                subdirs = [
                    x for x in os.listdir(search_dir)
                    if os.path.isdir(os.path.join(search_dir, x))
                ]
                for subdir in subdirs:
                    if ignorecase:
                        match = bool(re.search(pattern, subdir, re.IGNORECASE))
                    else:
                        match = bool(re.search(pattern, subdir))
                    if match:
                        _dir = os.path.normpath(
                            os.path.join(search_dir, subdir))
                        _rnsubdir = os.path.join(_dir, "renamed")
                        if os.path.isdir(_rnsubdir):
                            logger.info(
                                "{} has subdir renamed. Using that one".format(
                                    _dir))
                            _dir = _rnsubdir
                        if any([_dir in x for x in self.values()]):
                            # directory was already found before
                            continue
                        # append name of candidate ...
                        _candidates.append(subdir)
                        # ... and the corresponding data directory
                        self[subdir] = _dir

                        # now check if it is actually an exact match, if
                        # applicable
                        if return_if_match:

                            if ignorecase:
                                match = name_or_pattern.lower(
                                ) == subdir.lower()
                            else:
                                match = name_or_pattern == subdir
                            if match:
                                logger.info("Found match for ID {}".format(
                                    name_or_pattern))
                                if return_if_match:
                                    return _dir

            else:
                _msgs.append('directory %s does not exist\n' % search_dir)
        for msg in _msgs:
            logger.info(msg)

        for warning in _warnings:
            logger.warning(warning)

        if len(_candidates) == 0:
            raise DataSearchError(
                'No matches could be found for search pattern '
                '{}'.format(name_or_pattern))
        if return_if_match:
            if len(_candidates) == 1:
                logger.info("Found exactly one match for search pattern "
                            "{}: {}".format(name_or_pattern, _candidates[0]))
                return self[_candidates[0]]
            raise DataSearchError(
                'Found multiple matches for search pattern {}. '
                'Please choose from {}'.format(name_or_pattern, _candidates))
        return _candidates
示例#15
0
    def _browse(self, name_or_pattern, ignorecase=True, return_if_match=True):
        """Search all Aerocom data directories that match input name or pattern
        
        Note
        ----
        Please do not use this function but either 
        Parameters
        ----------
        name_or_pattern : str
            name or pattern of data (can be model or obs data)
        ignorecase : bool
            if True, upper / lower case is ignored
        return_if_match : bool
            if True, then the data directory is returned as string, if it can
            be found, else, only a list is returned that contains all 
            matches. The latter takes longer since the whole database is 
            searched.
            
        Returns
        -------
        :obj:`str` or :obj:`list`
            Data directory (str, if ``return_if_match`` is True) or list 
            containing valid Aerocom names (which can then be used to 
            retrieve the paths)
            
        Raises
        ------
        DataSearchError
            if no match or no unique match can be found
        """
        pattern = fnmatch.translate(name_or_pattern)
        _candidates = []
        _msgs = []
        _warnings = []

        for obs_id in const.OBS_IDS:
            if ignorecase:
                match = name_or_pattern.lower() == obs_id.lower()
            else:
                match = name_or_pattern == obs_id
            if match:
                logger.info("Found match for search pattern in obs network "
                            "directories {}".format(obs_id))
                self[obs_id] = const.OBSCONFIG[obs_id]["PATH"]
                if return_if_match:
                    return self[obs_id]
            else:
                if ignorecase:
                    match = bool(re.search(pattern, obs_id, re.IGNORECASE))
                else:
                    match = bool(re.search(pattern, obs_id))
            if match:
                self[obs_id] = const.OBSCONFIG[obs_id]["PATH"]
                _candidates.append(obs_id)

        for search_dir in const.MODELDIRS:
            # get the directories
            if isdir(search_dir):
                #subdirs = listdir(search_dir)
                subdirs = [
                    x for x in listdir(search_dir)
                    if isdir(join(search_dir, x))
                ]
                for subdir in subdirs:
                    if ignorecase:
                        match = bool(re.search(pattern, subdir, re.IGNORECASE))
                    else:
                        match = bool(re.search(pattern, subdir))
                    if match:
                        _dir = join(search_dir, subdir)
                        _rnsubdir = join(_dir, "renamed")
                        if isdir(_rnsubdir):
                            logger.info(
                                "{} has subdir renamed. Using that one".format(
                                    _dir))
                            _dir = _rnsubdir


# =============================================================================
#                         ok = True
#                         if const.GRID_IO.USE_RENAMED_DIR:
#                             logger.info("Checking if renamed directory exists")
#                             _dir = join(_dir, "renamed")
#                             if not isdir(_dir):
#                                 ok = False
#                                 _warnings.append("Renamed folder does not exist "
#                                                  "in {}".format(join(search_dir,
#                                                      subdir)))
#                         # directory exists and is candidate since it matches
#                         # the pattern
#                         if ok:
# =============================================================================
# append name of candidate ...
                        _candidates.append(subdir)
                        # ... and the corresponding data directory
                        self[subdir] = _dir

                        # now check if it is actually an exact match, if
                        # applicable
                        if return_if_match:

                            if ignorecase:
                                match = name_or_pattern.lower(
                                ) == subdir.lower()
                            else:
                                match = name_or_pattern == subdir
                            if match:
                                logger.info("Found match for ID {}".format(
                                    name_or_pattern))
                                if return_if_match:
                                    return _dir

            else:
                _msgs.append('directory %s does not exist\n' % search_dir)
        for msg in _msgs:
            logger.info(msg)

        for warning in _warnings:
            logger.warning(warning)

        if len(_candidates) == 0:
            raise DataSearchError(
                'No matches could be found for search pattern '
                '{}'.format(name_or_pattern))
        if return_if_match:
            if len(_candidates) == 1:
                logger.info("Found exactly one match for search pattern "
                            "{}: {}".format(name_or_pattern, _candidates[0]))
                return self[_candidates[0]]
            raise DataSearchError(
                'Found multiple matches for search pattern {}. '
                'Please choose from {}'.format(name_or_pattern, _candidates))
        return _candidates
示例#16
0
 def check_and_load(self, var_name):
     """Check if cache file exists and load
     
     Note
     ----
     If a cache file exists for this database, but cannot be loaded or is
     outdated against pyaerocom updates, then it will be removed (the latter
     only if :attr:`pyaerocom.const.RM_CACHE_OUTDATED` is True).
     
     Returns
     -------
     bool
         True, if cache file exists and could be successfully loaded, else
         False. Note: if import is successful, the corresponding data object
         (instance of :class:`pyaerocom.UngriddedData` can be accessed via
         :attr:`loaded_data'
         
     Raises
     ------
     TypeError
         if cached file is not an instance of :class:`pyaerocom.UngriddedData` 
         class (which should not happen)
     """
     try:
         fp = self.file_path(var_name)
     except FileNotFoundError as e:
         logger.warning(repr(e))
         return False
     
     if not os.path.isfile(fp):
         logger.info('No cache file available for {}, {}'
                     .format(self.dataset_to_read, var_name))
         return False
 
     
     delete_existing = const.RM_CACHE_OUTDATED
             
     in_handle = open(fp, 'rb')
     
     
     try:
         ok = self._check_pkl_head_vs_database(in_handle)
     except Exception as e:
         ok = False
         delete_existing = True
         logger.exception('File error in cached data file {}. File will '
                          'be removed and data reloaded'
                          'Error: {}'.format(fp, repr(e)))
     if not ok:
         # TODO: Should we delete the cache file if it is outdated ???
         logger.info('Aborting reading cache file {}. Aerocom database '
                     'or pyaerocom version has changed compared to '
                     'cached version'
                     .format(self.file_name(var_name)))
         in_handle.close()
         if delete_existing: #something was wrong
             const.print_log.info('Deleting outdated cache file: {}'
                                  .format(fp))
             os.remove(self.file_path(var_name))
         return False
     
     # everything is okay
     data = pickle.load(in_handle)
     if not isinstance(data, UngriddedData):
         raise TypeError('Unexpected data type stored in cache file, need '
                         'instance of UngriddedData, got {}'
                         .format(type(data)))
         
     self.loaded_data[var_name] = data
     logger.info('Successfully loaded data for {} from Cache'
                 .format(self.dataset_to_read))
     return True
示例#17
0
    def parse_from_ini(self, var_name=None, var_name_alt=None, cfg=None):
        """Import information about default region
        
        Parameters
        ----------
        var_name : str
            variable name
        var_name_alt : str
            alternative variable name that is used if variable name is not
            available
        cfg : ConfigParser
            open config parser object
            
        Returns
        -------
        bool
            True, if default could be loaded, False if not
        
        Raises
        ------
        IOError
            if regions.ini file does not exist
        """
        if cfg is None:
            cfg = self.read_config()
        var_info = {}
        if var_name is not None and var_name != 'DEFAULT':
            if var_name in cfg:
                logger.info("Found default configuration for variable "
                            "{}".format(var_name))
                var_info = cfg[var_name]
                #self.var_name = var_name
            elif isinstance(var_name_alt, str) and var_name_alt in cfg:
                var_info = cfg[var_name_alt]
            else:
                ap = parse_aliases_ini()
                aliases = _read_alias_ini(ap)
                if var_name in aliases:
                    var_name = aliases[var_name]
                    var_info = cfg[var_name]
                else:
                    try:
                        var_name = _check_alias_family(var_name, ap)
                        var_info = cfg[var_name]
                    except VariableDefinitionError:

                        logger.warning(
                            "No default configuration available for "
                            "variable {}. Using DEFAULT settings".format(
                                var_name))

        default = cfg['DEFAULT']

        for key in self.keys():
            if key in self.ALT_NAMES:
                if self.ALT_NAMES[key] in var_info:
                    self._add(key, var_info[self.ALT_NAMES[key]])
            elif key in var_info:
                self._add(key, var_info[key])
            elif key in default:
                self._add(key, default[key])

        self.var_name = var_name
示例#18
0
 def write(self, data, var_name=None):
     """Write single-variable instance of UngriddedData to cache
     
     Parameters
     ----------
     data : UngriddedData
         object containing the data (possibly containing multiple variables)
     var_name : str, optional
         name of variable that is supposed to be stored (only required if
         input `data` contains more than one variable)
     """
     meta = self.cache_meta_info()
     
     if not isinstance(data, UngriddedData):
         raise TypeError('Invalid input, need instance of UngriddedData, '
                         'got {}'.format(type(data)))
     if len(data.contains_datasets) > 1:
         raise CacheWriteError('Input UngriddedData object contains '
                               'datasets: {}. Can only write single '
                               'dataset objects'
                               .format(data.contains_datasets))
     if var_name is None:
         if len(data.contains_vars) > 1:
             raise CacheWriteError('Input UngriddedData object for {} contains '
                                   'more than one variable: {}. Please '
                                   'specify which variable should be '
                                   'cached'
                                   .format(self.reader.data_id,
                                           data.contains_vars))
         var_name = data.contains_vars[0]
     
     elif not var_name in data.contains_vars:
         raise CacheWriteError('Cannot write cache file: variable {} does '
                               'not exist in input UngriddedData object'
                               .format(var_name))
         
     if len(data.contains_vars) > 1:
         data = data.extract_var(var_name)
         
     fp = self.file_path(var_name)
     logger.info('Writing cache file: {}'.format(fp))
     success = True
     # OutHandle = gzip.open(c__cache_file, 'wb') # takes too much time
     out_handle = open(fp, 'wb')
     
     try:
         # write cache header
         pickle.dump(meta, out_handle, pickle.HIGHEST_PROTOCOL)
         # write data
         pickle.dump(data, out_handle, pickle.HIGHEST_PROTOCOL)
 
     except Exception as e:
         from pyaerocom import print_log
         print_log.exception('Failed to write cache'.format(repr(e)))
         success=False
     finally:    
         out_handle.close()
         if not success:
             os.remove(self.file_path)
     logger.info('Successfully wrote {} data ({}) to disk!'
                 .format(var_name, self.reader.data_id))
示例#19
0
    def read_file(self,
                  nasa_ames_file,
                  only_head=False,
                  replace_invalid_nan=True,
                  convert_timestamps=True,
                  decode_flags=True,
                  quality_check=True):
        """Read NASA Ames file
        
        Parameters
        ----------
        nasa_ames_file : str
            EBAS NASA Ames file
        only_head : bool
            read only file header
        replace_invalid_nan : bool
            replace all invalid values in the table by NaNs. The invalid values for
            each dependent data column are identified based on the information in 
            the file header.
        convert_timestamps : bool
            compute array of numpy datetime64 timestamps from numeric timestamps
            in data
        decode_flags : bool
            if True, all flags in all flag columns are decoded from floating 
            point representation to 3 integers, e.g. 
            0.111222333 -> 111 222 333
        quality_check : bool
            perform quality check after import (for details see 
            :func:`_quality_check`)
        """
        logger.info("Reading NASA Ames file:\n{}".format(nasa_ames_file))
        lc = 0  #line counter
        dc = 0  #data block line counter
        mc = 0  #meta block counter
        END_VAR_DEF = np.nan  #will be set (info stored in header)
        IN_DATA = False
        data = []
        _insert_invalid = None
        for line in open(nasa_ames_file):
            #print(lc, _NUM_FIXLINES, line)
            if IN_DATA:
                if dc == 0:
                    logger.debug(line)
                try:
                    data.append(
                        tuple([float(x.strip())
                               for x in line.strip().split()]))
                    #data.append([float(x.strip()) for x in line.strip().split()])
                except Exception as e:
                    data.append(_insert_invalid)
                    logger.warning("Failed to read data row {}. "
                                   "Error msg: {}".format(dc, repr(e)))
                dc += 1
            elif lc < self._NUM_FIXLINES:
                try:
                    val = self._H_FIXLINES_CONV[lc](line)
                    attr = self._H_FIXLINES_YIELD[lc]
                    if isinstance(attr, list):
                        for i, attr_id in enumerate(attr):
                            self[attr_id] = val[i]
                    else:
                        self[attr] = val
                except Exception as e:
                    msg = ("Failed to read header row {}.\n{}\n"
                           "Error msg: {}".format(lc, line, repr(e)))
                    if lc in self._HEAD_ROWS_MANDATORY:
                        raise NasaAmesReadError("Fatal: {}".format(msg))
                    else:
                        logger.warning(msg)
            else:
                _flagmap_idx = 0
                if mc == 0:
                    END_VAR_DEF = self._NUM_FIXLINES + self.num_cols_dependent - 1
                    NUM_HEAD_LINES = self.num_head_lines
                    try:
                        self.var_defs.append(self._read_vardef_line(line))
                    except Exception as e:
                        logger.warning(repr(e))

                elif lc < END_VAR_DEF:
                    var = self._read_vardef_line(line)
                    #if variable corresponds to flag column, assign this
                    #flag column to all previously read variables
                    if var.is_flag:
                        for _var in self.var_defs[_flagmap_idx:]:
                            _var.flag_id = var.name
                    self.var_defs.append(var)
                    _flagmap_idx = len(self.var_defs)
                    try:
                        pass
                        #self.var_defs.append(var)
                    except Exception as e:
                        logger.warning(repr(e))

                elif lc == NUM_HEAD_LINES - 1:
                    IN_DATA = True
                    self._data_header = h = [x.strip() for x in line.split()]
                    #append information of first two columns to variable
                    #definition array.
                    self._var_defs.insert(
                        0,
                        EbasColDef(name=h[0],
                                   is_flag=False,
                                   is_var=False,
                                   unit=self.time_unit))
                    self._var_defs.insert(
                        1,
                        EbasColDef(name=h[1],
                                   is_flag=False,
                                   is_var=False,
                                   unit=self.time_unit))
                    if only_head:
                        return
                    logger.debug("REACHED DATA BLOCK")
                    _insert_invalid = tuple([np.nan] * self.col_num)

                #elif lc > self._NUM_FIXLINES + 3:
                elif lc >= END_VAR_DEF + 2:
                    try:
                        name, val = line.split(":")
                        key = name.strip().lower().replace(" ", "_")
                        self.meta[key] = val.strip()
                    except Exception as e:
                        logger.warning("Failed to read line no. {}.\n{}\n"
                                       "Error msg: {}\n".format(
                                           lc, line, repr(e)))
                else:
                    logger.debug("Ignoring line no. {}: {}".format(lc, line))
                mc += 1
            lc += 1

        data = np.asarray(data)

        data[:, 1:] = data[:, 1:] * np.asarray(self.mul_factors)

        self._data = data
        if replace_invalid_nan:
            dep_dat = data[:, 1:]
            for i, val in enumerate(np.floor(self.vals_invalid)):
                try:
                    col = dep_dat[:, i]
                    cond = np.floor(col) == val
                    col[cond] = np.nan
                    dep_dat[:, i] = col
                except:
                    logger.warning("Failed to replace invalid values with "
                                   "NaNs in column {}".format(
                                       self.col_names[i + 1]))
            data[:, 1:] = dep_dat
        self._data = data

        if convert_timestamps:
            try:
                self.compute_time_stamps()
            except Exception as e:
                logger.warning("Failed to compute time stamps.\n"
                               "Error message: {}".format(repr(e)))
        self.init_flags(decode_flags)
        if quality_check:
            self._quality_check()
示例#20
0
    def parse_from_ini(self, var_name=None, cfg=None):
        """Import information about default region
        
        Parameters
        ----------
        var_name : str
            strind ID of region (must be specified in `regions.ini <https://
            github.com/metno/pyaerocom/blob/master/pyaerocom/data/regions.ini>`__ 
            file)
        cfg : ConfigParser
            open and read config parser object
            
        Returns
        -------
        bool
            True, if default could be loaded, False if not
        
        Raises
        ------
        IOError
            if regions.ini file does not exist

        
        """
        if cfg is None:
            cfg = self.read_config()

        var_info = {}
        if var_name is not None and var_name != 'DEFAULT':
            if var_name in cfg:
                logger.info("Found default configuration for variable "
                            "{}".format(var_name))
                var_info = cfg[var_name]
                self.var_name = var_name
            else:
                aliases = _read_alias_ini()
                if var_name in aliases:
                    var_info = cfg[aliases[var_name]]
                else:
                    logger.warning(
                        "No default configuration available for "
                        "variable {}. Using DEFAULT settings".format(var_name))

        default = cfg['DEFAULT']

        for key in self.keys():
            ok = True
            if key in var_info:
                val = var_info[key]
            elif key in default:
                val = default[key]
            else:
                ok = False
            if ok:
                if key in self._TYPE_CONV:
                    try:
                        val = self._TYPE_CONV[key](val)
                    except:
                        pass
                elif key == 'unit':
                    if val == 'None' or val == '1':
                        val = 1
                if val == 'None':
                    val = None
                self[key] = val
示例#21
0
    def check_set_countries(self, inplace=True, assign_to_dim=None):
        """
        Checks if country information is available and assigns if not

        If not country information is available, countries will be assigned
        for each lat / lon coordinate using
        :func:`pyaerocom.geodesy.get_country_info_coords`.

        Parameters
        ----------
        inplace : bool, optional
            If True, modify and return this object, else a copy.
            The default is True.
        assign_to_dim : str, optional
            name of dimension to which the country coordinate is assigned.
            Default is None, in which case station_name is used.

        Raises
        ------
        DataDimensionError
            If data is 4D (i.e. if latitude and longitude are othorgonal
            dimensions)

        Returns
        -------
        ColocatedData
            data object with countries assigned

        """
        if self.has_latlon_dims:
            raise DataDimensionError('Countries cannot be assigned to 4D'
                                     'ColocatedData with othorgonal lat / lon '
                                     'dimensions. Please consider stacking '
                                     'the latitude and longitude dimensions-')
        if assign_to_dim is None:
            assign_to_dim = 'station_name'

        if not assign_to_dim in self.dims:
            raise DataDimensionError('No such dimension', assign_to_dim)
# =============================================================================
#         if self.has_latlon_dims: #4D data
#             raise NotImplementedError('Cannot yet assign countries to 4D '
#                                       'ColocatedData')
# =============================================================================
        coldata = self if inplace else self.copy()

        if 'country' in coldata.data.coords:
            logger.info('Country information is available')
            return coldata
        coords = coldata._get_stat_coords()

        info = get_country_info_coords(coords)

        countries, codes = [],[]
        for item in info:
            countries.append(item['country'])
            codes.append(item['country_code'])

        arr = coldata.data
        arr = arr.assign_coords(country = (assign_to_dim, countries),
                                country_code=(assign_to_dim, codes))
        coldata.data = arr
        return coldata