Exemplo n.º 1
0
 def units(self):
     """Unit of data"""
     try:
         return self.data.attrs['var_units']
     except KeyError:
         logger.warning('Failed to access unit ColocatedData class (may be an '
                     'old version of data)')
Exemplo n.º 2
0
 def data(self, val):
     if not isinstance(val, xarray.DataArray):
         raise IOError('Invalid input for data attribute, need instance '
                          'of xarray.DataArray')
     if self._data is not None:
         logger.warning('Overwriting existing data in ColocatedData object')
     self._data = val
Exemplo n.º 3
0
def get_cmap_maps_aerocom(color_theme=None, vmin=None, vmax=None):
    """Get colormap using pyAeroCom color scheme
    
    Parameters
    ----------
    color_theme : :obj:`ColorTheme`, optional
        instance of pyaerocom color theme. If None, the default schemes is used
    vmin : :obj:`float`, optional
        lower end of value range
    vmax : :obj:`float`, optional
        upper end of value range
    
    Returns
    -------
    colormap
    """
    if color_theme is None:
        color_theme = COLOR_THEME
    if vmin is not None and vmax is not None and vmin < 0 and vmax > 0:
        cmap = get_cmap(color_theme.cmap_map_div)
        if color_theme.cmap_map_div_shifted:
            try:
                from geonum.helpers import shifted_color_map
                cmap = shifted_color_map(vmin, vmax, cmap)
            except:
                logger.warning(
                    'cannot shift colormap, need geonum installation')
        return cmap
    return get_cmap(color_theme.cmap_map)
Exemplo n.º 4
0
    def _read_vardef_line(self, line_from_file):
        """Import variable definition line from NASA Ames file"""
        spl = [x.strip() for x in line_from_file.split(",")]
        name = spl[0]
        if not len(spl) > 1:
            unit = ''
        else:
            unit = spl[1]
        data = EbasColDef(name=name, is_flag=True, is_var=False, unit=unit)

        if not "numflag" in name:
            data.is_var = True
            data.is_flag = False
            for item in spl[2:]:
                if "=" in item:
                    # e.g. wavelength=550nm
                    sub = item.split("=")
                    if len(sub) == 2:
                        idf, val = [x.strip() for x in sub]
                        data[idf.lower().replace(' ', '_')] = val
                    else:
                        logger.warning("Could not interpret part of column "
                                       "definition in EBAS NASA Ames file: "
                                       "{}".format(item))
                else:  #unit
                    logger.warning("Failed to interpret {}".format(item))

        return data
Exemplo n.º 5
0
 def _apply_gridded(self, data_obj):
     """Apply filter to instance of class :class:`GriddedData`
     """
     logger.warning(
         'Applying regional cropping in gridded data using Filter '
         'class. Note that this does not yet include potential '
         'cropping in the altitude dimension. Coming soon...')
     return data_obj.crop(region=self._region)
Exemplo n.º 6
0
def concatenate_iris_cubes(cubes, error_on_mismatch=True):
    """Concatenate list of :class:`iris.Cube` instances cubes into single Cube
    
    Helper method for concatenating list of cubes and that helps 
    with handling the fact that the corresponding iris method is not well 
    defined in the sense of what it returns (i.e. instance of 
    :class:`Cube` or :class:`CubeList`, depending on whether all cubes 
    could be concatenated or not...)
    
    This method is not supposed to be called directly but rather 
    :func:`concatenate_cubes` (which ALWAYS returns instance of 
    :class:`Cube` or raises Exception) or :func:`concatenate_possible_cubes`
    (which ALWAYS returns instance of :class:`CubeList` or raises Exception)
    
    Parameters
    ----------
    cubes : CubeList
        list of individual cubes
    error_on_mismatch
        boolean specifying whether an Exception is supposed to be raised
        or not
        
    Returns
    -------
    :obj:`Cube` or :obj:`CubeList`
        result of concatenation 
    
    Raises
    ------
    iris.exceptions.ConcatenateError
        if ``error_on_mismatch=True`` and input cubes could not all concatenated
        into a single instance of :class:`iris.Cube` class.
        
    """
    var_name = cubes[0].var_name
    if const.GRID_IO.EQUALISE_METADATA:
        meta_init = cubes[0].metadata
        if not all([x.metadata == meta_init for x in cubes]):
            logger.warning("{} cubes to be concatenated have different meta "
                           "data settings. These will be unified using the "
                           "metadata dictionary of the first cube "
                           "(otherwise the method concatenate of the iris "
                           "package won't work)".format(var_name))
            for cube in cubes:
                cube.metadata = meta_init
                
    #now put the CubeList together and form one cube
    #1st equalise the cubes (remove non common attributes)
    equalise_attributes(cubes)
    #unify time units
    iris.util.unify_time_units(cubes)
    
    #now concatenate the cube list to one cube
    cubes_concat = iris._concatenate.concatenate(cubes, error_on_mismatch)
    
    return cubes_concat[0]
Exemplo n.º 7
0
def calc_distance(lat0, lon0, lat1, lon1, alt0=None, alt1=None,
                  auto_altitude_srtm=False):
    """Calculate distance between two coordinates
    
    Parameters
    ----------
    lat0 : float
        latitude of first point in decimal degrees
    lon0 : float
        longitude of first point in decimal degrees
    lat1 : float 
        latitude of secondpoint in decimal degrees
    lon1 : float
        longitude of second point in decimal degrees
    alt0 : :obj:`float`, optional
        altitude of first point in m
    alt1 : :obj:`float`, optional
        altitude of second point in m
    auto_altitude_srtm : bool
        if True, then all altitudes that are unspecified are set to the 
        corresponding topographic altitude of that coordinate, using SRTM 
        (requires geonum to be available and works only for coordinates where
        SRTM topographic data is accessible).
        
    Returns
    -------
    float
        distance between points in km
    """
    if not GEONUM_AVAILABLE and auto_altitude_srtm:
        raise ModuleNotFoundError('Require Geonum library for accessing '
                                  'topographic altitude using SRTM database')
    if GEONUM_AVAILABLE:
        import geonum
        p0 = geonum.GeoPoint(lat0, lon0, alt0, 
                             auto_topo_access=auto_altitude_srtm)
        p1 = geonum.GeoPoint(lat1, lon1, alt1, 
                             auto_topo_access=auto_altitude_srtm)
        if auto_altitude_srtm:
            if p0.altitude_err == p0._ALTERR_DEFAULT:
                raise ValueError('Failed to access topographic height for coord '
                                 '{} using SRTM topographic database'.format(p0))
            elif p1.altitude_err == p1._ALTERR_DEFAULT:
                raise ValueError('Failed to access topographic height for coord '
                                 '{} using SRTM topographic database'.format(p1))
        return (p0 - p1).magnitude
    else:
        logger.warning('geonum is not installed, computing approximate '
                       'distance using haversine formula')
        hordist = haversine(lat0, lon0, lat1, lon1)
        if alt0 == None:
            alt0 = 0
        if alt1 == None:
            alt1 = 0 
        return np.linalg.norm((hordist, (alt0 - alt1)/1000))
Exemplo n.º 8
0
 def to_dataframe(self):
     """Convert this object into pandas.DataFrame
     
     Note
     ----
     This does not include meta information
     """
     logger.warning('This method is currently not completely finished')
     model_vals = self.data.values[1].flatten()
     obs_vals = self.data.values[0].flatten()
     mask = ~np.isnan(obs_vals)
     return pd.DataFrame({'ref': obs_vals[mask], 'data': model_vals[mask]})
Exemplo n.º 9
0
 def _check_aliases(self, varname, conf_reader):
     for section, item in conf_reader.items():
         if 'aliases' in item:
             if varname in [x.strip() for x in item['aliases'].split(',')]:
                 logger.warning(
                     'Found alias match ({}) for variable {}, '
                     'Note that searching for aliases slows down '
                     'things, thus, please consider using the '
                     'actual aerocom variable '
                     'name'.format(varname, section))
                 return section
     raise IOError(
         'No alias match could be found for variable {}'.format(varname))
Exemplo n.º 10
0
def _check_correct_time_dim(cube, file, file_convention=None):
    if file_convention is None:
        try:
            file_convention = FileConventionRead(from_file=file)
        except Exception:
            pass

    if not isinstance(file_convention, FileConventionRead):

        raise FileConventionError(
            'Unknown file convention: {}'.format(file_convention))

    finfo = file_convention.get_info_from_file(file)
    try:
        ts_type = TsType(finfo['ts_type'])
    except Exception:
        raise FileConventionError(
            'Invalid ts_type in file: {}'.format(ts_type))
    year = finfo['year']

    if not const.MIN_YEAR <= year <= const.MAX_YEAR:
        raise FileConventionError('Invalid year in file: {}'.format(year))
    try:
        check_time_coord(cube, ts_type, year)
    except UnresolvableTimeDefinitionError as e:
        raise UnresolvableTimeDefinitionError(repr(e))
    except Exception:
        msg = ("Invalid time dimension coordinate in file {}. ".format(
            os.path.basename(file)))
        logger.warning(msg)
        if const.GRID_IO.CORRECT_TIME_FILENAME:
            logger.warning("Attempting to correct time coordinate "
                           "using information in file name")
            try:
                cube = correct_time_coord(cube,
                                          ts_type=finfo["ts_type"],
                                          year=finfo["year"])
            except Exception:
                pass
        if const.WRITE_FILEIO_ERR_LOG:
            add_file_to_log(file, 'Invalid time dimension')
    return cube
Exemplo n.º 11
0
def plot_map(data, *args, **kwargs):
    """Map plot of grid data
    
    Note
    ----
    Deprecated name of method. Please use :func:`plot_griddeddata_on_map` in 
    the future.
    
    Parameters
    ----------
    data 
        data (2D numpy array or instance of GriddedData class. The latter is
        deprecated, but will continue to work)    
    *args, **kwargs
        See :func:`plot_griddeddata_on_map`
    
    Returns
    -------
    See :func:`plot_griddeddata_on_map`
    """
    from pyaerocom import print_log, GriddedData
    print_log.warning(
        DeprecationWarning('Method name plot_map is deprecated. '
                           'Please use plot_griddeddata_on_map'))
    if isinstance(data, GriddedData):
        if 'time' in data and len(data['time'].points) > 1:
            logger.warning(
                "Input data contains more than one time stamp, using "
                "first time stamp")
            data = data[0]
        if not all([x in data for x in ('longitude', 'latitude')]):
            raise AttributeError(
                'GriddedData does not contain either longitude '
                'or latitude coordinates')
        return plot_griddeddata_on_map(data.grid.data, data.longitude.points,
                                       data.latitude.points, *args, **kwargs)
    return plot_griddeddata_on_map(data, *args, **kwargs)
Exemplo n.º 12
0
def load_cube_custom(file, var_name=None, grid_io=None,
                     file_convention=None):
    """Load netcdf file as iris.Cube
    
    Parameters
    ----------
    files : list
        netcdf file
    var_name : str
        name of variable to read
    quality_check : bool
        if True, then a quality check of data is performed against the
        information provided in the filename
    file_convention : :obj:`FileConventionRead`, optional
        Aerocom file convention. If provided, then the data content (e.g. 
        dimension definitions) is tested against definition in file name.
    
    Returns
    -------
    iris.cube.Cube
        loaded data as Cube
    """
    if grid_io is None:
        grid_io = const.GRID_IO
    cube_list = iris.load(file)
    _num = len(cube_list)
    if _num != 1:
        if _num == 0:
            raise NetcdfError('Data from file {} could not be loaded using iris'
                              .format(file))
        else:
            logger.warning('File {} contains more than one data '
                           'field: {}'.format(file, 
                                              cube_list))
    cube = None
    if var_name is None:
        if not len(cube_list) == 1:
            vars_avail = [c.var_name for c in cube_list]
            raise NetcdfError('Could not load single cube from {}. Please '
                              'specify var_name. Input file contains the '
                              'following variables: {}'.format(file, 
                                                               vars_avail))
        cube = cube_list[0]
        var_name = cube.var_name
    else:
        for c in cube_list:
            if c.var_name == var_name:
                cube = c
    if cube is None:
        raise NetcdfError('Variable {} not available in file {}'.format(var_name, 
                                                                        file))
    if file_convention is None:
        try:
            file_convention = FileConventionRead(from_file=file)
        except:
            pass
    
    if isinstance(file_convention, FileConventionRead):
        finfo = file_convention.get_info_from_file(file)
        if grid_io.CHECK_TIME_FILENAME:
            if not check_time_coord(cube, ts_type=finfo["ts_type"], 
                                    year=finfo["year"]):
            
                msg = ("Invalid time dimension coordinate in file {}. " 
                       .format(os.path.basename(file)))
                logger.warning(msg)
                if grid_io.CORRECT_TIME_FILENAME:
                    logger.warning("Attempting to correct time coordinate "
                                   "using information in file name")
                    cube = correct_time_coord(cube, 
                                              ts_type=finfo["ts_type"],
                                              year=finfo["year"]) 
                if const.WRITE_FILEIO_ERR_LOG:
                    add_file_to_log(file, 'Invalid time dimension')
        else:
            logger.warning("WARNING: Automatic check of time "
                           "array in netCDF files is deactivated. "
                           "This may cause problems in case "
                           "the time dimension is not CF conform.")
    
    if grid_io.CHECK_DIM_COORDS:
        cube = check_dim_coords_cube(cube)
    
    try:
        if grid_io.DEL_TIME_BOUNDS:
            cube.coord("time").bounds = None
    except:
        logger.warning("Failed to access time coordinate in GriddedData")
        
    if grid_io.SHIFT_LONS:
        cube = check_and_regrid_lons_cube(cube)
    return cube
Exemplo n.º 13
0
def check_time_coord(cube, ts_type, year):
    """Method that checks the time coordinate of an iris Cube
    
    This method checks if the time dimension of a cube is accessible and 
    according to the standard (i.e. fully usable). It only checks, and does not
    correct. For the latter, please see :func:`correct_time_coord`.
    
    Parameters
    ----------
    cube : Cube
        cube containing data
    ts_type : str
        temporal resolution of data (e.g. "hourly", "daily"). This information
        is e.g. encrypted in the filename of a NetCDF file and may be 
        accessed using :class:`pyaerocom.io.FileConventionRead`
    year : int
        interger specifying year of observation, e.g. 2017
    
    Returns
    -------
    bool
        True, if time dimension is ok, False if not
    """
    
    ok = True
    test_idx = [0,1,2,7] #7, since last accessible index in a 3hourly dataset of one day is 7
    try:
        try:
            t = cube.coord("time")
        except:
            raise AttributeError("Cube does not contain time dimension")
        if not isinstance(t, iris.coords.DimCoord):
            raise AttributeError("Time is not a DimCoord instance")
        try:
            cftime_to_datetime64(0, cfunit=t.units)
        except:
            raise ValueError("Could not convert time unit string")
        tres_np = TSTR_TO_NP_TD[ts_type]
        conv = TSTR_TO_NP_DT[ts_type]
        base = datetime64("%s-01-01 00:00:00" %year).astype(conv)
        test_datenums = asarray(test_idx)
        ts_nominal = base + test_datenums.astype(tres_np)
        dts_nominal = ts_nominal[1:] - ts_nominal[:-1]
        ts_values = cftime_to_datetime64(t[test_idx].points, cfunit=t.units).astype(conv)
        dts_values = ts_values[1:] - ts_values[:-1]
        if not all(ts_values == ts_nominal):
            raise ValueError("Time match error, nominal dates for test array"
                             "%s (unit=%s): %s\nReceived values after "
                             "conversion: %s"
                             %(test_datenums, t.units.origin,
                               ts_nominal, ts_values))
        elif not all(dts_values == dts_nominal):
            raise ValueError("Time match error, time steps for test array"
                             "%s (unit=%s): %s\nReceived values after "
                             "conversion: %s"
                             %(test_datenums, t.units.origin,
                               dts_nominal, dts_values))
    except Exception as e:
        logger.warning("Invalid time dimension.\n"
                       "Error message: {}".format(repr(e)))
        ok = False
    return ok
Exemplo n.º 14
0
 def check_and_load(self, var_name):
     """Check if cache file exists and load
     
     Note
     ----
     If a cache file exists for this database, but cannot be loaded or is
     outdated against pyaerocom updates, then it will be removed (the latter
     only if :attr:`pyaerocom.const.RM_CACHE_OUTDATED` is True).
     
     Returns
     -------
     bool
         True, if cache file exists and could be successfully loaded, else
         False. Note: if import is successful, the corresponding data object
         (instance of :class:`pyaerocom.UngriddedData` can be accessed via
         :attr:`loaded_data'
         
     Raises
     ------
     TypeError
         if cached file is not an instance of :class:`pyaerocom.UngriddedData` 
         class (which should not happen)
     """
     try:
         fp = self.file_path(var_name)
     except FileNotFoundError as e:
         logger.warning(repr(e))
         return False
     
     if not os.path.isfile(fp):
         logger.info('No cache file available for {}, {}'
                     .format(self.dataset_to_read, var_name))
         return False
 
     
     delete_existing = const.RM_CACHE_OUTDATED
             
     in_handle = open(fp, 'rb')
     
     
     try:
         ok = self._check_pkl_head_vs_database(in_handle)
     except Exception as e:
         ok = False
         delete_existing = True
         logger.exception('File error in cached data file {}. File will '
                          'be removed and data reloaded'
                          'Error: {}'.format(fp, repr(e)))
     if not ok:
         # TODO: Should we delete the cache file if it is outdated ???
         logger.info('Aborting reading cache file {}. Aerocom database '
                     'or pyaerocom version has changed compared to '
                     'cached version'
                     .format(self.file_name(var_name)))
         in_handle.close()
         if delete_existing: #something was wrong
             const.print_log.info('Deleting outdated cache file: {}'
                                  .format(fp))
             os.remove(self.file_path(var_name))
         return False
     
     # everything is okay
     data = pickle.load(in_handle)
     if not isinstance(data, UngriddedData):
         raise TypeError('Unexpected data type stored in cache file, need '
                         'instance of UngriddedData, got {}'
                         .format(type(data)))
         
     self.loaded_data[var_name] = data
     logger.info('Successfully loaded data for {} from Cache'
                 .format(self.dataset_to_read))
     return True
Exemplo n.º 15
0
    def _browse(self, name_or_pattern, ignorecase=True, return_if_match=True):
        """Search all Aerocom data directories that match input name or pattern

        Note
        ----
        Please do not use this function but either
        Parameters
        ----------
        name_or_pattern : str
            name or pattern of data (can be model or obs data)
        ignorecase : bool
            if True, upper / lower case is ignored
        return_if_match : bool
            if True, then the data directory is returned as string, if it can
            be found, else, only a list is returned that contains all
            matches. The latter takes longer since the whole database is
            searched.

        Returns
        -------
        :obj:`str` or :obj:`list`
            Data directory (str, if ``return_if_match`` is True) or list
            containing valid Aerocom names (which can then be used to
            retrieve the paths)

        Raises
        ------
        DataSearchError
            if no match or no unique match can be found
        """
        pattern = fnmatch.translate(name_or_pattern)
        _candidates = []
        _msgs = []
        _warnings = []

        for obs_id, obs_path in const.OBSLOCS_UNGRIDDED.items():
            if ignorecase:
                match = name_or_pattern.lower() == obs_id.lower()
            else:
                match = name_or_pattern == obs_id
            if match:
                logger.info("Found match for search pattern in obs network "
                            "directories {}".format(obs_id))
                path = os.path.normpath(obs_path)
                if os.path.exists(path):
                    self[obs_id] = path
                    if return_if_match:
                        return path
            else:
                if ignorecase:
                    match = bool(re.search(pattern, obs_id, re.IGNORECASE))
                else:
                    match = bool(re.search(pattern, obs_id))
                if match:
                    path = os.path.normpath(obs_path)
                    if os.path.exists(path):
                        self[obs_id] = path
                        _candidates.append(obs_id)
                        if return_if_match:
                            return path

        for search_dir in const.DATA_SEARCH_DIRS:
            # get the directories
            if os.path.isdir(search_dir):
                #subdirs = listdir(search_dir)
                subdirs = [
                    x for x in os.listdir(search_dir)
                    if os.path.isdir(os.path.join(search_dir, x))
                ]
                for subdir in subdirs:
                    if ignorecase:
                        match = bool(re.search(pattern, subdir, re.IGNORECASE))
                    else:
                        match = bool(re.search(pattern, subdir))
                    if match:
                        _dir = os.path.normpath(
                            os.path.join(search_dir, subdir))
                        _rnsubdir = os.path.join(_dir, "renamed")
                        if os.path.isdir(_rnsubdir):
                            logger.info(
                                "{} has subdir renamed. Using that one".format(
                                    _dir))
                            _dir = _rnsubdir
                        if any([_dir in x for x in self.values()]):
                            # directory was already found before
                            continue
                        # append name of candidate ...
                        _candidates.append(subdir)
                        # ... and the corresponding data directory
                        self[subdir] = _dir

                        # now check if it is actually an exact match, if
                        # applicable
                        if return_if_match:

                            if ignorecase:
                                match = name_or_pattern.lower(
                                ) == subdir.lower()
                            else:
                                match = name_or_pattern == subdir
                            if match:
                                logger.info("Found match for ID {}".format(
                                    name_or_pattern))
                                if return_if_match:
                                    return _dir

            else:
                _msgs.append('directory %s does not exist\n' % search_dir)
        for msg in _msgs:
            logger.info(msg)

        for warning in _warnings:
            logger.warning(warning)

        if len(_candidates) == 0:
            raise DataSearchError(
                'No matches could be found for search pattern '
                '{}'.format(name_or_pattern))
        if return_if_match:
            if len(_candidates) == 1:
                logger.info("Found exactly one match for search pattern "
                            "{}: {}".format(name_or_pattern, _candidates[0]))
                return self[_candidates[0]]
            raise DataSearchError(
                'Found multiple matches for search pattern {}. '
                'Please choose from {}'.format(name_or_pattern, _candidates))
        return _candidates
Exemplo n.º 16
0
 def update(self, **kwargs):
     for k, v in kwargs.items():
         try:
             self[k] = v
         except:
             logger.warning("Invalid attribute: {}".format(k))
Exemplo n.º 17
0
def _calc_od_helper(data,
                    var_name,
                    to_lambda,
                    od_ref,
                    lambda_ref,
                    od_ref_alt=None,
                    lambda_ref_alt=None,
                    use_angstrom_coeff='ang4487aer'):
    """Helper method for computing ODs
    
    Parameters
    ----------
    data : dict-like
        data object containing loaded results used to compute the ODs at a new
        wavelength
    var_name : str
        name of variable that is supposed to be computed (is used in order to 
        see whether a global lower threshold is defined for this variable and
        if this is the case, all computed values that are below this threshold
        are replaced with NaNs)
    to_lambda : float
        wavelength of computed AOD
    od_ref : :obj:`float` or :obj:`ndarray`
        reference AOD
    lambda_ref : :obj:`float` or :obj:`ndarray`
        wavelength corresponding to reference AOD
    od_ref_alt : :obj:`float` or :obj:`ndarray`, optional
        alternative reference AOD (is used for datapoints where former is 
        invalid)
    lambda_ref_alt : :obj:`float` or :obj:`ndarray`, optional
        wavelength corresponding to alternative reference AOD
    use_angstrom_coeff : str
        name of Angstrom coefficient in data, that is used for computation
        
    Returns
    -------
    :obj:`float` or :obj:`ndarray`
        AOD(s) at shifted wavelength
        
    Raises
    ------
    AttributeError
        if neither ``od_ref`` nor ``od_ref_alt`` are available in data, or if
        ``use_angstrom_coeff`` is missing
    """
    if not od_ref in data:
        logger.warning('Reference OD at {} nm is not available in data, '
                       'checking alternative'.format(lambda_ref))
        if od_ref_alt is None or not od_ref_alt in data:
            raise AttributeError('No alternative OD found for computation of '
                                 '{}'.format(var_name))
        return compute_od_from_angstromexp(
            to_lambda=to_lambda,
            od_ref=data[od_ref_alt],
            lambda_ref=lambda_ref_alt,
            angstrom_coeff=data[use_angstrom_coeff])
    elif not use_angstrom_coeff in data:
        raise AttributeError("Angstrom coefficient (440-870 nm) is not "
                             "available in provided data")
    result = compute_od_from_angstromexp(
        to_lambda=to_lambda,
        od_ref=data[od_ref],
        lambda_ref=lambda_ref,
        angstrom_coeff=data[use_angstrom_coeff])
    # optional if available
    if od_ref_alt in data:
        # fill up time steps that are nans with values calculated from the
        # alternative wavelength to minimise gaps in the time series
        mask = np.argwhere(np.isnan(result))

        if len(mask) > 0:  #there are nans
            ods_alt = data[od_ref_alt][mask]
            ang = data[use_angstrom_coeff][mask]
            replace = compute_od_from_angstromexp(to_lambda=to_lambda,
                                                  od_ref=ods_alt,
                                                  lambda_ref=lambda_ref_alt,
                                                  angstrom_coeff=ang)
            result[mask] = replace

    try:
        # now replace all values with NaNs that are below the global lower threshold
        below_thresh = result < const.VAR_PARAM[var_name]['minimum']
        result[below_thresh] = np.nan
    except:
        logger.warn("Could not access lower limit from global settings for "
                    "variable {}".format(var_name))

    return result
Exemplo n.º 18
0
    def _browse(self, name_or_pattern, ignorecase=True, return_if_match=True):
        """Search all Aerocom data directories that match input name or pattern
        
        Note
        ----
        Please do not use this function but either 
        Parameters
        ----------
        name_or_pattern : str
            name or pattern of data (can be model or obs data)
        ignorecase : bool
            if True, upper / lower case is ignored
        return_if_match : bool
            if True, then the data directory is returned as string, if it can
            be found, else, only a list is returned that contains all 
            matches. The latter takes longer since the whole database is 
            searched.
            
        Returns
        -------
        :obj:`str` or :obj:`list`
            Data directory (str, if ``return_if_match`` is True) or list 
            containing valid Aerocom names (which can then be used to 
            retrieve the paths)
            
        Raises
        ------
        DataSearchError
            if no match or no unique match can be found
        """
        pattern = fnmatch.translate(name_or_pattern)
        _candidates = []
        _msgs = []
        _warnings = []

        for obs_id in const.OBS_IDS:
            if ignorecase:
                match = name_or_pattern.lower() == obs_id.lower()
            else:
                match = name_or_pattern == obs_id
            if match:
                logger.info("Found match for search pattern in obs network "
                            "directories {}".format(obs_id))
                self[obs_id] = const.OBSCONFIG[obs_id]["PATH"]
                if return_if_match:
                    return self[obs_id]
            else:
                if ignorecase:
                    match = bool(re.search(pattern, obs_id, re.IGNORECASE))
                else:
                    match = bool(re.search(pattern, obs_id))
            if match:
                self[obs_id] = const.OBSCONFIG[obs_id]["PATH"]
                _candidates.append(obs_id)

        for search_dir in const.MODELDIRS:
            # get the directories
            if isdir(search_dir):
                #subdirs = listdir(search_dir)
                subdirs = [
                    x for x in listdir(search_dir)
                    if isdir(join(search_dir, x))
                ]
                for subdir in subdirs:
                    if ignorecase:
                        match = bool(re.search(pattern, subdir, re.IGNORECASE))
                    else:
                        match = bool(re.search(pattern, subdir))
                    if match:
                        _dir = join(search_dir, subdir)
                        _rnsubdir = join(_dir, "renamed")
                        if isdir(_rnsubdir):
                            logger.info(
                                "{} has subdir renamed. Using that one".format(
                                    _dir))
                            _dir = _rnsubdir


# =============================================================================
#                         ok = True
#                         if const.GRID_IO.USE_RENAMED_DIR:
#                             logger.info("Checking if renamed directory exists")
#                             _dir = join(_dir, "renamed")
#                             if not isdir(_dir):
#                                 ok = False
#                                 _warnings.append("Renamed folder does not exist "
#                                                  "in {}".format(join(search_dir,
#                                                      subdir)))
#                         # directory exists and is candidate since it matches
#                         # the pattern
#                         if ok:
# =============================================================================
# append name of candidate ...
                        _candidates.append(subdir)
                        # ... and the corresponding data directory
                        self[subdir] = _dir

                        # now check if it is actually an exact match, if
                        # applicable
                        if return_if_match:

                            if ignorecase:
                                match = name_or_pattern.lower(
                                ) == subdir.lower()
                            else:
                                match = name_or_pattern == subdir
                            if match:
                                logger.info("Found match for ID {}".format(
                                    name_or_pattern))
                                if return_if_match:
                                    return _dir

            else:
                _msgs.append('directory %s does not exist\n' % search_dir)
        for msg in _msgs:
            logger.info(msg)

        for warning in _warnings:
            logger.warning(warning)

        if len(_candidates) == 0:
            raise DataSearchError(
                'No matches could be found for search pattern '
                '{}'.format(name_or_pattern))
        if return_if_match:
            if len(_candidates) == 1:
                logger.info("Found exactly one match for search pattern "
                            "{}: {}".format(name_or_pattern, _candidates[0]))
                return self[_candidates[0]]
            raise DataSearchError(
                'Found multiple matches for search pattern {}. '
                'Please choose from {}'.format(name_or_pattern, _candidates))
        return _candidates
Exemplo n.º 19
0
    def parse_from_ini(self, var_name=None, var_name_alt=None, cfg=None):
        """Import information about default region
        
        Parameters
        ----------
        var_name : str
            variable name
        var_name_alt : str
            alternative variable name that is used if variable name is not
            available
        cfg : ConfigParser
            open config parser object
            
        Returns
        -------
        bool
            True, if default could be loaded, False if not
        
        Raises
        ------
        IOError
            if regions.ini file does not exist
        """
        if cfg is None:
            cfg = self.read_config()
        var_info = {}
        if var_name is not None and var_name != 'DEFAULT':
            if var_name in cfg:
                logger.info("Found default configuration for variable "
                            "{}".format(var_name))
                var_info = cfg[var_name]
                #self.var_name = var_name
            elif isinstance(var_name_alt, str) and var_name_alt in cfg:
                var_info = cfg[var_name_alt]
            else:
                ap = parse_aliases_ini()
                aliases = _read_alias_ini(ap)
                if var_name in aliases:
                    var_name = aliases[var_name]
                    var_info = cfg[var_name]
                else:
                    try:
                        var_name = _check_alias_family(var_name, ap)
                        var_info = cfg[var_name]
                    except VariableDefinitionError:

                        logger.warning(
                            "No default configuration available for "
                            "variable {}. Using DEFAULT settings".format(
                                var_name))

        default = cfg['DEFAULT']

        for key in self.keys():
            if key in self.ALT_NAMES:
                if self.ALT_NAMES[key] in var_info:
                    self._add(key, var_info[self.ALT_NAMES[key]])
            elif key in var_info:
                self._add(key, var_info[key])
            elif key in default:
                self._add(key, default[key])

        self.var_name = var_name
Exemplo n.º 20
0
 def upper_limit(self):
     """Old attribute name for :attr:`minimum` (following HTAP2 defs)"""
     logger.warning(DeprecationWarning('Old name for attribute minimum'))
     return self.maximum
Exemplo n.º 21
0
def load_cube_custom(file,
                     var_name=None,
                     file_convention=None,
                     perform_fmt_checks=None):
    """Load netcdf file as iris.Cube
    
    Parameters
    ----------
    file : str
        netcdf file
    var_name : str
        name of variable to read
    quality_check : bool
        if True, then a quality check of data is performed against the
        information provided in the filename
    file_convention : :obj:`FileConventionRead`, optional
        Aerocom file convention. If provided, then the data content (e.g. 
        dimension definitions) is tested against definition in file name
    perform_fmt_checks : bool
        if True, additional quality checks (and corrections) are (attempted to
        be) performed.
    
    Returns
    -------
    iris.cube.Cube
        loaded data as Cube
    """
    if perform_fmt_checks is None:
        perform_fmt_checks = const.GRID_IO.PERFORM_FMT_CHECKS
    cube_list = iris.load(file)

    _num = len(cube_list)
    if _num != 1:
        if _num == 0:
            raise NetcdfError(
                'Data from file {} could not be loaded using iris'.format(
                    file))
        else:
            logger.warning(
                'File {} contains more than one variable'.format(file))
    cube = None
    if var_name is None:
        if not len(cube_list) == 1:
            vars_avail = [c.var_name for c in cube_list]
            raise NetcdfError('Could not load single cube from {}. Please '
                              'specify var_name. Input file contains the '
                              'following variables: {}'.format(
                                  file, vars_avail))
        cube = cube_list[0]
        var_name = cube.var_name
    else:
        for c in cube_list:
            if c.var_name == var_name:
                cube = c
                break
    if cube is None:
        raise NetcdfError('Variable {} not available in file {}'.format(
            var_name, file))
    if perform_fmt_checks:
        try:
            cube = _check_var_unit_cube(cube)
        except VariableDefinitionError:
            pass

        grid_io = const.GRID_IO
        if grid_io.CHECK_TIME_FILENAME:
            try:
                cube = _check_correct_time_dim(cube, file, file_convention)
            except FileConventionError:
                const.print_log.warning('WARNING: failed to check / validate '
                                        'time dim. using information in '
                                        'filename. Reason: invalid file name '
                                        'convention')
        else:
            logger.warning("WARNING: Automatic check of time "
                           "array in netCDF files is deactivated. "
                           "This may cause problems in case "
                           "the time dimension is not CF conform.")
        if grid_io.CHECK_DIM_COORDS:
            cube = check_dim_coords_cube(cube)

        try:
            if grid_io.DEL_TIME_BOUNDS:
                cube.coord("time").bounds = None
        except:
            logger.warning("Failed to access time coordinate in GriddedData")

        if grid_io.SHIFT_LONS:
            cube = check_and_regrid_lons_cube(cube)
    return cube
Exemplo n.º 22
0
    def read_file(self,
                  nasa_ames_file,
                  only_head=False,
                  replace_invalid_nan=True,
                  convert_timestamps=True,
                  decode_flags=True,
                  quality_check=True):
        """Read NASA Ames file
        
        Parameters
        ----------
        nasa_ames_file : str
            EBAS NASA Ames file
        only_head : bool
            read only file header
        replace_invalid_nan : bool
            replace all invalid values in the table by NaNs. The invalid values for
            each dependent data column are identified based on the information in 
            the file header.
        convert_timestamps : bool
            compute array of numpy datetime64 timestamps from numeric timestamps
            in data
        decode_flags : bool
            if True, all flags in all flag columns are decoded from floating 
            point representation to 3 integers, e.g. 
            0.111222333 -> 111 222 333
        quality_check : bool
            perform quality check after import (for details see 
            :func:`_quality_check`)
        """
        logger.info("Reading NASA Ames file:\n{}".format(nasa_ames_file))
        lc = 0  #line counter
        dc = 0  #data block line counter
        mc = 0  #meta block counter
        END_VAR_DEF = np.nan  #will be set (info stored in header)
        IN_DATA = False
        data = []
        _insert_invalid = None
        for line in open(nasa_ames_file):
            #print(lc, _NUM_FIXLINES, line)
            if IN_DATA:
                if dc == 0:
                    logger.debug(line)
                try:
                    data.append(
                        tuple([float(x.strip())
                               for x in line.strip().split()]))
                    #data.append([float(x.strip()) for x in line.strip().split()])
                except Exception as e:
                    data.append(_insert_invalid)
                    logger.warning("Failed to read data row {}. "
                                   "Error msg: {}".format(dc, repr(e)))
                dc += 1
            elif lc < self._NUM_FIXLINES:
                try:
                    val = self._H_FIXLINES_CONV[lc](line)
                    attr = self._H_FIXLINES_YIELD[lc]
                    if isinstance(attr, list):
                        for i, attr_id in enumerate(attr):
                            self[attr_id] = val[i]
                    else:
                        self[attr] = val
                except Exception as e:
                    msg = ("Failed to read header row {}.\n{}\n"
                           "Error msg: {}".format(lc, line, repr(e)))
                    if lc in self._HEAD_ROWS_MANDATORY:
                        raise NasaAmesReadError("Fatal: {}".format(msg))
                    else:
                        logger.warning(msg)
            else:
                _flagmap_idx = 0
                if mc == 0:
                    END_VAR_DEF = self._NUM_FIXLINES + self.num_cols_dependent - 1
                    NUM_HEAD_LINES = self.num_head_lines
                    try:
                        self.var_defs.append(self._read_vardef_line(line))
                    except Exception as e:
                        logger.warning(repr(e))

                elif lc < END_VAR_DEF:
                    var = self._read_vardef_line(line)
                    #if variable corresponds to flag column, assign this
                    #flag column to all previously read variables
                    if var.is_flag:
                        for _var in self.var_defs[_flagmap_idx:]:
                            _var.flag_id = var.name
                    self.var_defs.append(var)
                    _flagmap_idx = len(self.var_defs)
                    try:
                        pass
                        #self.var_defs.append(var)
                    except Exception as e:
                        logger.warning(repr(e))

                elif lc == NUM_HEAD_LINES - 1:
                    IN_DATA = True
                    self._data_header = h = [x.strip() for x in line.split()]
                    #append information of first two columns to variable
                    #definition array.
                    self._var_defs.insert(
                        0,
                        EbasColDef(name=h[0],
                                   is_flag=False,
                                   is_var=False,
                                   unit=self.time_unit))
                    self._var_defs.insert(
                        1,
                        EbasColDef(name=h[1],
                                   is_flag=False,
                                   is_var=False,
                                   unit=self.time_unit))
                    if only_head:
                        return
                    logger.debug("REACHED DATA BLOCK")
                    _insert_invalid = tuple([np.nan] * self.col_num)

                #elif lc > self._NUM_FIXLINES + 3:
                elif lc >= END_VAR_DEF + 2:
                    try:
                        name, val = line.split(":")
                        key = name.strip().lower().replace(" ", "_")
                        self.meta[key] = val.strip()
                    except Exception as e:
                        logger.warning("Failed to read line no. {}.\n{}\n"
                                       "Error msg: {}\n".format(
                                           lc, line, repr(e)))
                else:
                    logger.debug("Ignoring line no. {}: {}".format(lc, line))
                mc += 1
            lc += 1

        data = np.asarray(data)

        data[:, 1:] = data[:, 1:] * np.asarray(self.mul_factors)

        self._data = data
        if replace_invalid_nan:
            dep_dat = data[:, 1:]
            for i, val in enumerate(np.floor(self.vals_invalid)):
                try:
                    col = dep_dat[:, i]
                    cond = np.floor(col) == val
                    col[cond] = np.nan
                    dep_dat[:, i] = col
                except:
                    logger.warning("Failed to replace invalid values with "
                                   "NaNs in column {}".format(
                                       self.col_names[i + 1]))
            data[:, 1:] = dep_dat
        self._data = data

        if convert_timestamps:
            try:
                self.compute_time_stamps()
            except Exception as e:
                logger.warning("Failed to compute time stamps.\n"
                               "Error message: {}".format(repr(e)))
        self.init_flags(decode_flags)
        if quality_check:
            self._quality_check()
Exemplo n.º 23
0
    def parse_from_ini(self, var_name=None, cfg=None):
        """Import information about default region
        
        Parameters
        ----------
        var_name : str
            strind ID of region (must be specified in `regions.ini <https://
            github.com/metno/pyaerocom/blob/master/pyaerocom/data/regions.ini>`__ 
            file)
        cfg : ConfigParser
            open and read config parser object
            
        Returns
        -------
        bool
            True, if default could be loaded, False if not
        
        Raises
        ------
        IOError
            if regions.ini file does not exist

        
        """
        if cfg is None:
            cfg = self.read_config()

        var_info = {}
        if var_name is not None and var_name != 'DEFAULT':
            if var_name in cfg:
                logger.info("Found default configuration for variable "
                            "{}".format(var_name))
                var_info = cfg[var_name]
                self.var_name = var_name
            else:
                aliases = _read_alias_ini()
                if var_name in aliases:
                    var_info = cfg[aliases[var_name]]
                else:
                    logger.warning(
                        "No default configuration available for "
                        "variable {}. Using DEFAULT settings".format(var_name))

        default = cfg['DEFAULT']

        for key in self.keys():
            ok = True
            if key in var_info:
                val = var_info[key]
            elif key in default:
                val = default[key]
            else:
                ok = False
            if ok:
                if key in self._TYPE_CONV:
                    try:
                        val = self._TYPE_CONV[key](val)
                    except:
                        pass
                elif key == 'unit':
                    if val == 'None' or val == '1':
                        val = 1
                if val == 'None':
                    val = None
                self[key] = val