def units(self): """Unit of data""" try: return self.data.attrs['var_units'] except KeyError: logger.warning('Failed to access unit ColocatedData class (may be an ' 'old version of data)')
def data(self, val): if not isinstance(val, xarray.DataArray): raise IOError('Invalid input for data attribute, need instance ' 'of xarray.DataArray') if self._data is not None: logger.warning('Overwriting existing data in ColocatedData object') self._data = val
def get_cmap_maps_aerocom(color_theme=None, vmin=None, vmax=None): """Get colormap using pyAeroCom color scheme Parameters ---------- color_theme : :obj:`ColorTheme`, optional instance of pyaerocom color theme. If None, the default schemes is used vmin : :obj:`float`, optional lower end of value range vmax : :obj:`float`, optional upper end of value range Returns ------- colormap """ if color_theme is None: color_theme = COLOR_THEME if vmin is not None and vmax is not None and vmin < 0 and vmax > 0: cmap = get_cmap(color_theme.cmap_map_div) if color_theme.cmap_map_div_shifted: try: from geonum.helpers import shifted_color_map cmap = shifted_color_map(vmin, vmax, cmap) except: logger.warning( 'cannot shift colormap, need geonum installation') return cmap return get_cmap(color_theme.cmap_map)
def _read_vardef_line(self, line_from_file): """Import variable definition line from NASA Ames file""" spl = [x.strip() for x in line_from_file.split(",")] name = spl[0] if not len(spl) > 1: unit = '' else: unit = spl[1] data = EbasColDef(name=name, is_flag=True, is_var=False, unit=unit) if not "numflag" in name: data.is_var = True data.is_flag = False for item in spl[2:]: if "=" in item: # e.g. wavelength=550nm sub = item.split("=") if len(sub) == 2: idf, val = [x.strip() for x in sub] data[idf.lower().replace(' ', '_')] = val else: logger.warning("Could not interpret part of column " "definition in EBAS NASA Ames file: " "{}".format(item)) else: #unit logger.warning("Failed to interpret {}".format(item)) return data
def _apply_gridded(self, data_obj): """Apply filter to instance of class :class:`GriddedData` """ logger.warning( 'Applying regional cropping in gridded data using Filter ' 'class. Note that this does not yet include potential ' 'cropping in the altitude dimension. Coming soon...') return data_obj.crop(region=self._region)
def concatenate_iris_cubes(cubes, error_on_mismatch=True): """Concatenate list of :class:`iris.Cube` instances cubes into single Cube Helper method for concatenating list of cubes and that helps with handling the fact that the corresponding iris method is not well defined in the sense of what it returns (i.e. instance of :class:`Cube` or :class:`CubeList`, depending on whether all cubes could be concatenated or not...) This method is not supposed to be called directly but rather :func:`concatenate_cubes` (which ALWAYS returns instance of :class:`Cube` or raises Exception) or :func:`concatenate_possible_cubes` (which ALWAYS returns instance of :class:`CubeList` or raises Exception) Parameters ---------- cubes : CubeList list of individual cubes error_on_mismatch boolean specifying whether an Exception is supposed to be raised or not Returns ------- :obj:`Cube` or :obj:`CubeList` result of concatenation Raises ------ iris.exceptions.ConcatenateError if ``error_on_mismatch=True`` and input cubes could not all concatenated into a single instance of :class:`iris.Cube` class. """ var_name = cubes[0].var_name if const.GRID_IO.EQUALISE_METADATA: meta_init = cubes[0].metadata if not all([x.metadata == meta_init for x in cubes]): logger.warning("{} cubes to be concatenated have different meta " "data settings. These will be unified using the " "metadata dictionary of the first cube " "(otherwise the method concatenate of the iris " "package won't work)".format(var_name)) for cube in cubes: cube.metadata = meta_init #now put the CubeList together and form one cube #1st equalise the cubes (remove non common attributes) equalise_attributes(cubes) #unify time units iris.util.unify_time_units(cubes) #now concatenate the cube list to one cube cubes_concat = iris._concatenate.concatenate(cubes, error_on_mismatch) return cubes_concat[0]
def calc_distance(lat0, lon0, lat1, lon1, alt0=None, alt1=None, auto_altitude_srtm=False): """Calculate distance between two coordinates Parameters ---------- lat0 : float latitude of first point in decimal degrees lon0 : float longitude of first point in decimal degrees lat1 : float latitude of secondpoint in decimal degrees lon1 : float longitude of second point in decimal degrees alt0 : :obj:`float`, optional altitude of first point in m alt1 : :obj:`float`, optional altitude of second point in m auto_altitude_srtm : bool if True, then all altitudes that are unspecified are set to the corresponding topographic altitude of that coordinate, using SRTM (requires geonum to be available and works only for coordinates where SRTM topographic data is accessible). Returns ------- float distance between points in km """ if not GEONUM_AVAILABLE and auto_altitude_srtm: raise ModuleNotFoundError('Require Geonum library for accessing ' 'topographic altitude using SRTM database') if GEONUM_AVAILABLE: import geonum p0 = geonum.GeoPoint(lat0, lon0, alt0, auto_topo_access=auto_altitude_srtm) p1 = geonum.GeoPoint(lat1, lon1, alt1, auto_topo_access=auto_altitude_srtm) if auto_altitude_srtm: if p0.altitude_err == p0._ALTERR_DEFAULT: raise ValueError('Failed to access topographic height for coord ' '{} using SRTM topographic database'.format(p0)) elif p1.altitude_err == p1._ALTERR_DEFAULT: raise ValueError('Failed to access topographic height for coord ' '{} using SRTM topographic database'.format(p1)) return (p0 - p1).magnitude else: logger.warning('geonum is not installed, computing approximate ' 'distance using haversine formula') hordist = haversine(lat0, lon0, lat1, lon1) if alt0 == None: alt0 = 0 if alt1 == None: alt1 = 0 return np.linalg.norm((hordist, (alt0 - alt1)/1000))
def to_dataframe(self): """Convert this object into pandas.DataFrame Note ---- This does not include meta information """ logger.warning('This method is currently not completely finished') model_vals = self.data.values[1].flatten() obs_vals = self.data.values[0].flatten() mask = ~np.isnan(obs_vals) return pd.DataFrame({'ref': obs_vals[mask], 'data': model_vals[mask]})
def _check_aliases(self, varname, conf_reader): for section, item in conf_reader.items(): if 'aliases' in item: if varname in [x.strip() for x in item['aliases'].split(',')]: logger.warning( 'Found alias match ({}) for variable {}, ' 'Note that searching for aliases slows down ' 'things, thus, please consider using the ' 'actual aerocom variable ' 'name'.format(varname, section)) return section raise IOError( 'No alias match could be found for variable {}'.format(varname))
def _check_correct_time_dim(cube, file, file_convention=None): if file_convention is None: try: file_convention = FileConventionRead(from_file=file) except Exception: pass if not isinstance(file_convention, FileConventionRead): raise FileConventionError( 'Unknown file convention: {}'.format(file_convention)) finfo = file_convention.get_info_from_file(file) try: ts_type = TsType(finfo['ts_type']) except Exception: raise FileConventionError( 'Invalid ts_type in file: {}'.format(ts_type)) year = finfo['year'] if not const.MIN_YEAR <= year <= const.MAX_YEAR: raise FileConventionError('Invalid year in file: {}'.format(year)) try: check_time_coord(cube, ts_type, year) except UnresolvableTimeDefinitionError as e: raise UnresolvableTimeDefinitionError(repr(e)) except Exception: msg = ("Invalid time dimension coordinate in file {}. ".format( os.path.basename(file))) logger.warning(msg) if const.GRID_IO.CORRECT_TIME_FILENAME: logger.warning("Attempting to correct time coordinate " "using information in file name") try: cube = correct_time_coord(cube, ts_type=finfo["ts_type"], year=finfo["year"]) except Exception: pass if const.WRITE_FILEIO_ERR_LOG: add_file_to_log(file, 'Invalid time dimension') return cube
def plot_map(data, *args, **kwargs): """Map plot of grid data Note ---- Deprecated name of method. Please use :func:`plot_griddeddata_on_map` in the future. Parameters ---------- data data (2D numpy array or instance of GriddedData class. The latter is deprecated, but will continue to work) *args, **kwargs See :func:`plot_griddeddata_on_map` Returns ------- See :func:`plot_griddeddata_on_map` """ from pyaerocom import print_log, GriddedData print_log.warning( DeprecationWarning('Method name plot_map is deprecated. ' 'Please use plot_griddeddata_on_map')) if isinstance(data, GriddedData): if 'time' in data and len(data['time'].points) > 1: logger.warning( "Input data contains more than one time stamp, using " "first time stamp") data = data[0] if not all([x in data for x in ('longitude', 'latitude')]): raise AttributeError( 'GriddedData does not contain either longitude ' 'or latitude coordinates') return plot_griddeddata_on_map(data.grid.data, data.longitude.points, data.latitude.points, *args, **kwargs) return plot_griddeddata_on_map(data, *args, **kwargs)
def load_cube_custom(file, var_name=None, grid_io=None, file_convention=None): """Load netcdf file as iris.Cube Parameters ---------- files : list netcdf file var_name : str name of variable to read quality_check : bool if True, then a quality check of data is performed against the information provided in the filename file_convention : :obj:`FileConventionRead`, optional Aerocom file convention. If provided, then the data content (e.g. dimension definitions) is tested against definition in file name. Returns ------- iris.cube.Cube loaded data as Cube """ if grid_io is None: grid_io = const.GRID_IO cube_list = iris.load(file) _num = len(cube_list) if _num != 1: if _num == 0: raise NetcdfError('Data from file {} could not be loaded using iris' .format(file)) else: logger.warning('File {} contains more than one data ' 'field: {}'.format(file, cube_list)) cube = None if var_name is None: if not len(cube_list) == 1: vars_avail = [c.var_name for c in cube_list] raise NetcdfError('Could not load single cube from {}. Please ' 'specify var_name. Input file contains the ' 'following variables: {}'.format(file, vars_avail)) cube = cube_list[0] var_name = cube.var_name else: for c in cube_list: if c.var_name == var_name: cube = c if cube is None: raise NetcdfError('Variable {} not available in file {}'.format(var_name, file)) if file_convention is None: try: file_convention = FileConventionRead(from_file=file) except: pass if isinstance(file_convention, FileConventionRead): finfo = file_convention.get_info_from_file(file) if grid_io.CHECK_TIME_FILENAME: if not check_time_coord(cube, ts_type=finfo["ts_type"], year=finfo["year"]): msg = ("Invalid time dimension coordinate in file {}. " .format(os.path.basename(file))) logger.warning(msg) if grid_io.CORRECT_TIME_FILENAME: logger.warning("Attempting to correct time coordinate " "using information in file name") cube = correct_time_coord(cube, ts_type=finfo["ts_type"], year=finfo["year"]) if const.WRITE_FILEIO_ERR_LOG: add_file_to_log(file, 'Invalid time dimension') else: logger.warning("WARNING: Automatic check of time " "array in netCDF files is deactivated. " "This may cause problems in case " "the time dimension is not CF conform.") if grid_io.CHECK_DIM_COORDS: cube = check_dim_coords_cube(cube) try: if grid_io.DEL_TIME_BOUNDS: cube.coord("time").bounds = None except: logger.warning("Failed to access time coordinate in GriddedData") if grid_io.SHIFT_LONS: cube = check_and_regrid_lons_cube(cube) return cube
def check_time_coord(cube, ts_type, year): """Method that checks the time coordinate of an iris Cube This method checks if the time dimension of a cube is accessible and according to the standard (i.e. fully usable). It only checks, and does not correct. For the latter, please see :func:`correct_time_coord`. Parameters ---------- cube : Cube cube containing data ts_type : str temporal resolution of data (e.g. "hourly", "daily"). This information is e.g. encrypted in the filename of a NetCDF file and may be accessed using :class:`pyaerocom.io.FileConventionRead` year : int interger specifying year of observation, e.g. 2017 Returns ------- bool True, if time dimension is ok, False if not """ ok = True test_idx = [0,1,2,7] #7, since last accessible index in a 3hourly dataset of one day is 7 try: try: t = cube.coord("time") except: raise AttributeError("Cube does not contain time dimension") if not isinstance(t, iris.coords.DimCoord): raise AttributeError("Time is not a DimCoord instance") try: cftime_to_datetime64(0, cfunit=t.units) except: raise ValueError("Could not convert time unit string") tres_np = TSTR_TO_NP_TD[ts_type] conv = TSTR_TO_NP_DT[ts_type] base = datetime64("%s-01-01 00:00:00" %year).astype(conv) test_datenums = asarray(test_idx) ts_nominal = base + test_datenums.astype(tres_np) dts_nominal = ts_nominal[1:] - ts_nominal[:-1] ts_values = cftime_to_datetime64(t[test_idx].points, cfunit=t.units).astype(conv) dts_values = ts_values[1:] - ts_values[:-1] if not all(ts_values == ts_nominal): raise ValueError("Time match error, nominal dates for test array" "%s (unit=%s): %s\nReceived values after " "conversion: %s" %(test_datenums, t.units.origin, ts_nominal, ts_values)) elif not all(dts_values == dts_nominal): raise ValueError("Time match error, time steps for test array" "%s (unit=%s): %s\nReceived values after " "conversion: %s" %(test_datenums, t.units.origin, dts_nominal, dts_values)) except Exception as e: logger.warning("Invalid time dimension.\n" "Error message: {}".format(repr(e))) ok = False return ok
def check_and_load(self, var_name): """Check if cache file exists and load Note ---- If a cache file exists for this database, but cannot be loaded or is outdated against pyaerocom updates, then it will be removed (the latter only if :attr:`pyaerocom.const.RM_CACHE_OUTDATED` is True). Returns ------- bool True, if cache file exists and could be successfully loaded, else False. Note: if import is successful, the corresponding data object (instance of :class:`pyaerocom.UngriddedData` can be accessed via :attr:`loaded_data' Raises ------ TypeError if cached file is not an instance of :class:`pyaerocom.UngriddedData` class (which should not happen) """ try: fp = self.file_path(var_name) except FileNotFoundError as e: logger.warning(repr(e)) return False if not os.path.isfile(fp): logger.info('No cache file available for {}, {}' .format(self.dataset_to_read, var_name)) return False delete_existing = const.RM_CACHE_OUTDATED in_handle = open(fp, 'rb') try: ok = self._check_pkl_head_vs_database(in_handle) except Exception as e: ok = False delete_existing = True logger.exception('File error in cached data file {}. File will ' 'be removed and data reloaded' 'Error: {}'.format(fp, repr(e))) if not ok: # TODO: Should we delete the cache file if it is outdated ??? logger.info('Aborting reading cache file {}. Aerocom database ' 'or pyaerocom version has changed compared to ' 'cached version' .format(self.file_name(var_name))) in_handle.close() if delete_existing: #something was wrong const.print_log.info('Deleting outdated cache file: {}' .format(fp)) os.remove(self.file_path(var_name)) return False # everything is okay data = pickle.load(in_handle) if not isinstance(data, UngriddedData): raise TypeError('Unexpected data type stored in cache file, need ' 'instance of UngriddedData, got {}' .format(type(data))) self.loaded_data[var_name] = data logger.info('Successfully loaded data for {} from Cache' .format(self.dataset_to_read)) return True
def _browse(self, name_or_pattern, ignorecase=True, return_if_match=True): """Search all Aerocom data directories that match input name or pattern Note ---- Please do not use this function but either Parameters ---------- name_or_pattern : str name or pattern of data (can be model or obs data) ignorecase : bool if True, upper / lower case is ignored return_if_match : bool if True, then the data directory is returned as string, if it can be found, else, only a list is returned that contains all matches. The latter takes longer since the whole database is searched. Returns ------- :obj:`str` or :obj:`list` Data directory (str, if ``return_if_match`` is True) or list containing valid Aerocom names (which can then be used to retrieve the paths) Raises ------ DataSearchError if no match or no unique match can be found """ pattern = fnmatch.translate(name_or_pattern) _candidates = [] _msgs = [] _warnings = [] for obs_id, obs_path in const.OBSLOCS_UNGRIDDED.items(): if ignorecase: match = name_or_pattern.lower() == obs_id.lower() else: match = name_or_pattern == obs_id if match: logger.info("Found match for search pattern in obs network " "directories {}".format(obs_id)) path = os.path.normpath(obs_path) if os.path.exists(path): self[obs_id] = path if return_if_match: return path else: if ignorecase: match = bool(re.search(pattern, obs_id, re.IGNORECASE)) else: match = bool(re.search(pattern, obs_id)) if match: path = os.path.normpath(obs_path) if os.path.exists(path): self[obs_id] = path _candidates.append(obs_id) if return_if_match: return path for search_dir in const.DATA_SEARCH_DIRS: # get the directories if os.path.isdir(search_dir): #subdirs = listdir(search_dir) subdirs = [ x for x in os.listdir(search_dir) if os.path.isdir(os.path.join(search_dir, x)) ] for subdir in subdirs: if ignorecase: match = bool(re.search(pattern, subdir, re.IGNORECASE)) else: match = bool(re.search(pattern, subdir)) if match: _dir = os.path.normpath( os.path.join(search_dir, subdir)) _rnsubdir = os.path.join(_dir, "renamed") if os.path.isdir(_rnsubdir): logger.info( "{} has subdir renamed. Using that one".format( _dir)) _dir = _rnsubdir if any([_dir in x for x in self.values()]): # directory was already found before continue # append name of candidate ... _candidates.append(subdir) # ... and the corresponding data directory self[subdir] = _dir # now check if it is actually an exact match, if # applicable if return_if_match: if ignorecase: match = name_or_pattern.lower( ) == subdir.lower() else: match = name_or_pattern == subdir if match: logger.info("Found match for ID {}".format( name_or_pattern)) if return_if_match: return _dir else: _msgs.append('directory %s does not exist\n' % search_dir) for msg in _msgs: logger.info(msg) for warning in _warnings: logger.warning(warning) if len(_candidates) == 0: raise DataSearchError( 'No matches could be found for search pattern ' '{}'.format(name_or_pattern)) if return_if_match: if len(_candidates) == 1: logger.info("Found exactly one match for search pattern " "{}: {}".format(name_or_pattern, _candidates[0])) return self[_candidates[0]] raise DataSearchError( 'Found multiple matches for search pattern {}. ' 'Please choose from {}'.format(name_or_pattern, _candidates)) return _candidates
def update(self, **kwargs): for k, v in kwargs.items(): try: self[k] = v except: logger.warning("Invalid attribute: {}".format(k))
def _calc_od_helper(data, var_name, to_lambda, od_ref, lambda_ref, od_ref_alt=None, lambda_ref_alt=None, use_angstrom_coeff='ang4487aer'): """Helper method for computing ODs Parameters ---------- data : dict-like data object containing loaded results used to compute the ODs at a new wavelength var_name : str name of variable that is supposed to be computed (is used in order to see whether a global lower threshold is defined for this variable and if this is the case, all computed values that are below this threshold are replaced with NaNs) to_lambda : float wavelength of computed AOD od_ref : :obj:`float` or :obj:`ndarray` reference AOD lambda_ref : :obj:`float` or :obj:`ndarray` wavelength corresponding to reference AOD od_ref_alt : :obj:`float` or :obj:`ndarray`, optional alternative reference AOD (is used for datapoints where former is invalid) lambda_ref_alt : :obj:`float` or :obj:`ndarray`, optional wavelength corresponding to alternative reference AOD use_angstrom_coeff : str name of Angstrom coefficient in data, that is used for computation Returns ------- :obj:`float` or :obj:`ndarray` AOD(s) at shifted wavelength Raises ------ AttributeError if neither ``od_ref`` nor ``od_ref_alt`` are available in data, or if ``use_angstrom_coeff`` is missing """ if not od_ref in data: logger.warning('Reference OD at {} nm is not available in data, ' 'checking alternative'.format(lambda_ref)) if od_ref_alt is None or not od_ref_alt in data: raise AttributeError('No alternative OD found for computation of ' '{}'.format(var_name)) return compute_od_from_angstromexp( to_lambda=to_lambda, od_ref=data[od_ref_alt], lambda_ref=lambda_ref_alt, angstrom_coeff=data[use_angstrom_coeff]) elif not use_angstrom_coeff in data: raise AttributeError("Angstrom coefficient (440-870 nm) is not " "available in provided data") result = compute_od_from_angstromexp( to_lambda=to_lambda, od_ref=data[od_ref], lambda_ref=lambda_ref, angstrom_coeff=data[use_angstrom_coeff]) # optional if available if od_ref_alt in data: # fill up time steps that are nans with values calculated from the # alternative wavelength to minimise gaps in the time series mask = np.argwhere(np.isnan(result)) if len(mask) > 0: #there are nans ods_alt = data[od_ref_alt][mask] ang = data[use_angstrom_coeff][mask] replace = compute_od_from_angstromexp(to_lambda=to_lambda, od_ref=ods_alt, lambda_ref=lambda_ref_alt, angstrom_coeff=ang) result[mask] = replace try: # now replace all values with NaNs that are below the global lower threshold below_thresh = result < const.VAR_PARAM[var_name]['minimum'] result[below_thresh] = np.nan except: logger.warn("Could not access lower limit from global settings for " "variable {}".format(var_name)) return result
def _browse(self, name_or_pattern, ignorecase=True, return_if_match=True): """Search all Aerocom data directories that match input name or pattern Note ---- Please do not use this function but either Parameters ---------- name_or_pattern : str name or pattern of data (can be model or obs data) ignorecase : bool if True, upper / lower case is ignored return_if_match : bool if True, then the data directory is returned as string, if it can be found, else, only a list is returned that contains all matches. The latter takes longer since the whole database is searched. Returns ------- :obj:`str` or :obj:`list` Data directory (str, if ``return_if_match`` is True) or list containing valid Aerocom names (which can then be used to retrieve the paths) Raises ------ DataSearchError if no match or no unique match can be found """ pattern = fnmatch.translate(name_or_pattern) _candidates = [] _msgs = [] _warnings = [] for obs_id in const.OBS_IDS: if ignorecase: match = name_or_pattern.lower() == obs_id.lower() else: match = name_or_pattern == obs_id if match: logger.info("Found match for search pattern in obs network " "directories {}".format(obs_id)) self[obs_id] = const.OBSCONFIG[obs_id]["PATH"] if return_if_match: return self[obs_id] else: if ignorecase: match = bool(re.search(pattern, obs_id, re.IGNORECASE)) else: match = bool(re.search(pattern, obs_id)) if match: self[obs_id] = const.OBSCONFIG[obs_id]["PATH"] _candidates.append(obs_id) for search_dir in const.MODELDIRS: # get the directories if isdir(search_dir): #subdirs = listdir(search_dir) subdirs = [ x for x in listdir(search_dir) if isdir(join(search_dir, x)) ] for subdir in subdirs: if ignorecase: match = bool(re.search(pattern, subdir, re.IGNORECASE)) else: match = bool(re.search(pattern, subdir)) if match: _dir = join(search_dir, subdir) _rnsubdir = join(_dir, "renamed") if isdir(_rnsubdir): logger.info( "{} has subdir renamed. Using that one".format( _dir)) _dir = _rnsubdir # ============================================================================= # ok = True # if const.GRID_IO.USE_RENAMED_DIR: # logger.info("Checking if renamed directory exists") # _dir = join(_dir, "renamed") # if not isdir(_dir): # ok = False # _warnings.append("Renamed folder does not exist " # "in {}".format(join(search_dir, # subdir))) # # directory exists and is candidate since it matches # # the pattern # if ok: # ============================================================================= # append name of candidate ... _candidates.append(subdir) # ... and the corresponding data directory self[subdir] = _dir # now check if it is actually an exact match, if # applicable if return_if_match: if ignorecase: match = name_or_pattern.lower( ) == subdir.lower() else: match = name_or_pattern == subdir if match: logger.info("Found match for ID {}".format( name_or_pattern)) if return_if_match: return _dir else: _msgs.append('directory %s does not exist\n' % search_dir) for msg in _msgs: logger.info(msg) for warning in _warnings: logger.warning(warning) if len(_candidates) == 0: raise DataSearchError( 'No matches could be found for search pattern ' '{}'.format(name_or_pattern)) if return_if_match: if len(_candidates) == 1: logger.info("Found exactly one match for search pattern " "{}: {}".format(name_or_pattern, _candidates[0])) return self[_candidates[0]] raise DataSearchError( 'Found multiple matches for search pattern {}. ' 'Please choose from {}'.format(name_or_pattern, _candidates)) return _candidates
def parse_from_ini(self, var_name=None, var_name_alt=None, cfg=None): """Import information about default region Parameters ---------- var_name : str variable name var_name_alt : str alternative variable name that is used if variable name is not available cfg : ConfigParser open config parser object Returns ------- bool True, if default could be loaded, False if not Raises ------ IOError if regions.ini file does not exist """ if cfg is None: cfg = self.read_config() var_info = {} if var_name is not None and var_name != 'DEFAULT': if var_name in cfg: logger.info("Found default configuration for variable " "{}".format(var_name)) var_info = cfg[var_name] #self.var_name = var_name elif isinstance(var_name_alt, str) and var_name_alt in cfg: var_info = cfg[var_name_alt] else: ap = parse_aliases_ini() aliases = _read_alias_ini(ap) if var_name in aliases: var_name = aliases[var_name] var_info = cfg[var_name] else: try: var_name = _check_alias_family(var_name, ap) var_info = cfg[var_name] except VariableDefinitionError: logger.warning( "No default configuration available for " "variable {}. Using DEFAULT settings".format( var_name)) default = cfg['DEFAULT'] for key in self.keys(): if key in self.ALT_NAMES: if self.ALT_NAMES[key] in var_info: self._add(key, var_info[self.ALT_NAMES[key]]) elif key in var_info: self._add(key, var_info[key]) elif key in default: self._add(key, default[key]) self.var_name = var_name
def upper_limit(self): """Old attribute name for :attr:`minimum` (following HTAP2 defs)""" logger.warning(DeprecationWarning('Old name for attribute minimum')) return self.maximum
def load_cube_custom(file, var_name=None, file_convention=None, perform_fmt_checks=None): """Load netcdf file as iris.Cube Parameters ---------- file : str netcdf file var_name : str name of variable to read quality_check : bool if True, then a quality check of data is performed against the information provided in the filename file_convention : :obj:`FileConventionRead`, optional Aerocom file convention. If provided, then the data content (e.g. dimension definitions) is tested against definition in file name perform_fmt_checks : bool if True, additional quality checks (and corrections) are (attempted to be) performed. Returns ------- iris.cube.Cube loaded data as Cube """ if perform_fmt_checks is None: perform_fmt_checks = const.GRID_IO.PERFORM_FMT_CHECKS cube_list = iris.load(file) _num = len(cube_list) if _num != 1: if _num == 0: raise NetcdfError( 'Data from file {} could not be loaded using iris'.format( file)) else: logger.warning( 'File {} contains more than one variable'.format(file)) cube = None if var_name is None: if not len(cube_list) == 1: vars_avail = [c.var_name for c in cube_list] raise NetcdfError('Could not load single cube from {}. Please ' 'specify var_name. Input file contains the ' 'following variables: {}'.format( file, vars_avail)) cube = cube_list[0] var_name = cube.var_name else: for c in cube_list: if c.var_name == var_name: cube = c break if cube is None: raise NetcdfError('Variable {} not available in file {}'.format( var_name, file)) if perform_fmt_checks: try: cube = _check_var_unit_cube(cube) except VariableDefinitionError: pass grid_io = const.GRID_IO if grid_io.CHECK_TIME_FILENAME: try: cube = _check_correct_time_dim(cube, file, file_convention) except FileConventionError: const.print_log.warning('WARNING: failed to check / validate ' 'time dim. using information in ' 'filename. Reason: invalid file name ' 'convention') else: logger.warning("WARNING: Automatic check of time " "array in netCDF files is deactivated. " "This may cause problems in case " "the time dimension is not CF conform.") if grid_io.CHECK_DIM_COORDS: cube = check_dim_coords_cube(cube) try: if grid_io.DEL_TIME_BOUNDS: cube.coord("time").bounds = None except: logger.warning("Failed to access time coordinate in GriddedData") if grid_io.SHIFT_LONS: cube = check_and_regrid_lons_cube(cube) return cube
def read_file(self, nasa_ames_file, only_head=False, replace_invalid_nan=True, convert_timestamps=True, decode_flags=True, quality_check=True): """Read NASA Ames file Parameters ---------- nasa_ames_file : str EBAS NASA Ames file only_head : bool read only file header replace_invalid_nan : bool replace all invalid values in the table by NaNs. The invalid values for each dependent data column are identified based on the information in the file header. convert_timestamps : bool compute array of numpy datetime64 timestamps from numeric timestamps in data decode_flags : bool if True, all flags in all flag columns are decoded from floating point representation to 3 integers, e.g. 0.111222333 -> 111 222 333 quality_check : bool perform quality check after import (for details see :func:`_quality_check`) """ logger.info("Reading NASA Ames file:\n{}".format(nasa_ames_file)) lc = 0 #line counter dc = 0 #data block line counter mc = 0 #meta block counter END_VAR_DEF = np.nan #will be set (info stored in header) IN_DATA = False data = [] _insert_invalid = None for line in open(nasa_ames_file): #print(lc, _NUM_FIXLINES, line) if IN_DATA: if dc == 0: logger.debug(line) try: data.append( tuple([float(x.strip()) for x in line.strip().split()])) #data.append([float(x.strip()) for x in line.strip().split()]) except Exception as e: data.append(_insert_invalid) logger.warning("Failed to read data row {}. " "Error msg: {}".format(dc, repr(e))) dc += 1 elif lc < self._NUM_FIXLINES: try: val = self._H_FIXLINES_CONV[lc](line) attr = self._H_FIXLINES_YIELD[lc] if isinstance(attr, list): for i, attr_id in enumerate(attr): self[attr_id] = val[i] else: self[attr] = val except Exception as e: msg = ("Failed to read header row {}.\n{}\n" "Error msg: {}".format(lc, line, repr(e))) if lc in self._HEAD_ROWS_MANDATORY: raise NasaAmesReadError("Fatal: {}".format(msg)) else: logger.warning(msg) else: _flagmap_idx = 0 if mc == 0: END_VAR_DEF = self._NUM_FIXLINES + self.num_cols_dependent - 1 NUM_HEAD_LINES = self.num_head_lines try: self.var_defs.append(self._read_vardef_line(line)) except Exception as e: logger.warning(repr(e)) elif lc < END_VAR_DEF: var = self._read_vardef_line(line) #if variable corresponds to flag column, assign this #flag column to all previously read variables if var.is_flag: for _var in self.var_defs[_flagmap_idx:]: _var.flag_id = var.name self.var_defs.append(var) _flagmap_idx = len(self.var_defs) try: pass #self.var_defs.append(var) except Exception as e: logger.warning(repr(e)) elif lc == NUM_HEAD_LINES - 1: IN_DATA = True self._data_header = h = [x.strip() for x in line.split()] #append information of first two columns to variable #definition array. self._var_defs.insert( 0, EbasColDef(name=h[0], is_flag=False, is_var=False, unit=self.time_unit)) self._var_defs.insert( 1, EbasColDef(name=h[1], is_flag=False, is_var=False, unit=self.time_unit)) if only_head: return logger.debug("REACHED DATA BLOCK") _insert_invalid = tuple([np.nan] * self.col_num) #elif lc > self._NUM_FIXLINES + 3: elif lc >= END_VAR_DEF + 2: try: name, val = line.split(":") key = name.strip().lower().replace(" ", "_") self.meta[key] = val.strip() except Exception as e: logger.warning("Failed to read line no. {}.\n{}\n" "Error msg: {}\n".format( lc, line, repr(e))) else: logger.debug("Ignoring line no. {}: {}".format(lc, line)) mc += 1 lc += 1 data = np.asarray(data) data[:, 1:] = data[:, 1:] * np.asarray(self.mul_factors) self._data = data if replace_invalid_nan: dep_dat = data[:, 1:] for i, val in enumerate(np.floor(self.vals_invalid)): try: col = dep_dat[:, i] cond = np.floor(col) == val col[cond] = np.nan dep_dat[:, i] = col except: logger.warning("Failed to replace invalid values with " "NaNs in column {}".format( self.col_names[i + 1])) data[:, 1:] = dep_dat self._data = data if convert_timestamps: try: self.compute_time_stamps() except Exception as e: logger.warning("Failed to compute time stamps.\n" "Error message: {}".format(repr(e))) self.init_flags(decode_flags) if quality_check: self._quality_check()
def parse_from_ini(self, var_name=None, cfg=None): """Import information about default region Parameters ---------- var_name : str strind ID of region (must be specified in `regions.ini <https:// github.com/metno/pyaerocom/blob/master/pyaerocom/data/regions.ini>`__ file) cfg : ConfigParser open and read config parser object Returns ------- bool True, if default could be loaded, False if not Raises ------ IOError if regions.ini file does not exist """ if cfg is None: cfg = self.read_config() var_info = {} if var_name is not None and var_name != 'DEFAULT': if var_name in cfg: logger.info("Found default configuration for variable " "{}".format(var_name)) var_info = cfg[var_name] self.var_name = var_name else: aliases = _read_alias_ini() if var_name in aliases: var_info = cfg[aliases[var_name]] else: logger.warning( "No default configuration available for " "variable {}. Using DEFAULT settings".format(var_name)) default = cfg['DEFAULT'] for key in self.keys(): ok = True if key in var_info: val = var_info[key] elif key in default: val = default[key] else: ok = False if ok: if key in self._TYPE_CONV: try: val = self._TYPE_CONV[key](val) except: pass elif key == 'unit': if val == 'None' or val == '1': val = 1 if val == 'None': val = None self[key] = val