def get_meta(self, force_single_value=True, quality_check=True): """Return meta-data as dictionary Parameters ---------- force_single_value : bool if True, then each meta value that is list or array,is converted to a single value. quality_check : bool if True, and coordinate values are lists or arrays, then the standarad deviation in the values is compared to the upper limits allowed in the local variation. The upper limits are specified in attr. ``COORD_MAX_VAR``. Returns ------- dict dictionary containing the retrieved meta-data Raises ------ AttributeError if one of the meta entries is invalid MetaDataError in case of consistencies in meta data between individual time-stamps """ meta = {} for key in self.META_KEYS: val = self[key] if force_single_value and not isinstance(val, str): if not any([isinstance(val, x) for x in [list, np.ndarray]]): raise AttributeError("Invalid value encountered for meta " "key {}, need str, list or ndarray, " "got {}".format(key, type(val))) if quality_check and not all([x == val[0] for x in val]): logger.debug("Performing quality check for meta data") raise MetaDataError( "Inconsistencies in meta parameter {} " "between different time-stamps".format(key)) val = val[0] meta[key] = val return meta
def check_and_load(self): if not os.path.isfile(self.file_path): logger.info('No cache file available for query of dataset ' '{}'.format(self.dataset_to_read)) return False delete_existing = False in_handle = open(self.file_path, 'rb') # read meta information about file if self.connection_established: try: use_cache_file = self._check_pkl_head_vs_database(in_handle) except Exception as e: use_cache_file = False delete_existing = True logger.exception('File error in cached data file {}. File will ' 'be removed and data reloaded' 'Error: {}'.format(self.file_path, repr(e))) if not use_cache_file: # TODO: Should we delete the cache file if it is outdated ??? logger.info('Aborting reading cache file {}. Aerocom database ' 'has changed compared to cached version' .format(self.file_name)) in_handle.close() if delete_existing: #something was wrong os.remove(self.file_path) return False else: for k in range(self.LEN_CACHE_HEAD): logger.debug(pickle.load(in_handle)) # everything is okay data = pickle.load(in_handle) if not isinstance(data, UngriddedData): raise TypeError('Unexpected data type stored in cache file, need ' 'instance of UngriddedData, got {}'.format(type(data))) self.loaded_data = data logger.info('Successfully loaded data for {} from Cache'.format(self.dataset_to_read)) return True
def get_station_coords(self, force_single_value=True, quality_check=True): """Return coordinates as dictionary Parameters ---------- force_single_value : bool if True and coordinate values are lists or arrays, then they are collapsed to single value using mean quality_check : bool if True, and coordinate values are lists or arrays, then the standarad deviation in the values is compared to the upper limits allowed in the local variation. The upper limits are specified in attr. ``COORD_MAX_VAR``. Returns ------- dict dictionary containing the retrieved coordinates Raises ------ AttributeError if one of the coordinate values is invalid CoordinateError if local variation in either of the three spatial coordinates is found too large """ _check_var = False vals, stds = {}, {} for key in self.COORD_KEYS: val = self[key] std = 0.0 if force_single_value and not isinstance(val, (float, np.floating)): if isinstance(val, (int, np.integer)): val = np.float64(val) elif isinstance(val, (list, np.ndarray)): val = np.mean(val) std = np.std(val) if std > 0: _check_var = True else: raise AttributeError( "Invalid value encountered for coord " "{}, need float, int, list or ndarray, " "got {}".format(key, type(val))) vals[key] = val stds[key] = std if _check_var: logger.debug("Performing quality check for coordinates") lat, dlat, dlon, dalt = (vals['stat_lat'], stds['stat_lat'], stds['stat_lon'], stds['stat_alt']) lat_len = 111e3 #approximate length of latitude degree in m if self.COORD_MAX_VAR['stat_lat'] < lat_len * dlat: raise CoordinateError("Variation in station latitude is " "exceeding upper limit of {} m".format( self.COORD_MAX_VAR['stat_lat'])) elif self.COORD_MAX_VAR['stat_lon'] < ( lat_len * np.cos(np.deg2rad(lat)) * dlon): raise CoordinateError("Variation in station longitude is " "exceeding upper limit of {} m".format( self.COORD_MAX_VAR['stat_lat'])) elif self.COORD_MAX_VAR['stat_alt'] < dalt: raise CoordinateError("Variation in station altitude is " "exceeding upper limit of {} m".format( self.COORD_MAX_VAR['stat_lat'])) return vals
def read_file(self, nasa_ames_file, only_head=False, replace_invalid_nan=True, convert_timestamps=True, decode_flags=True, quality_check=True): """Read NASA Ames file Parameters ---------- nasa_ames_file : str EBAS NASA Ames file only_head : bool read only file header replace_invalid_nan : bool replace all invalid values in the table by NaNs. The invalid values for each dependent data column are identified based on the information in the file header. convert_timestamps : bool compute array of numpy datetime64 timestamps from numeric timestamps in data decode_flags : bool if True, all flags in all flag columns are decoded from floating point representation to 3 integers, e.g. 0.111222333 -> 111 222 333 quality_check : bool perform quality check after import (for details see :func:`_quality_check`) """ logger.info("Reading NASA Ames file:\n{}".format(nasa_ames_file)) lc = 0 #line counter dc = 0 #data block line counter mc = 0 #meta block counter END_VAR_DEF = np.nan #will be set (info stored in header) IN_DATA = False data = [] _insert_invalid = None for line in open(nasa_ames_file): #print(lc, _NUM_FIXLINES, line) if IN_DATA: if dc == 0: logger.debug(line) try: data.append( tuple([float(x.strip()) for x in line.strip().split()])) #data.append([float(x.strip()) for x in line.strip().split()]) except Exception as e: data.append(_insert_invalid) logger.warning("Failed to read data row {}. " "Error msg: {}".format(dc, repr(e))) dc += 1 elif lc < self._NUM_FIXLINES: try: val = self._H_FIXLINES_CONV[lc](line) attr = self._H_FIXLINES_YIELD[lc] if isinstance(attr, list): for i, attr_id in enumerate(attr): self[attr_id] = val[i] else: self[attr] = val except Exception as e: msg = ("Failed to read header row {}.\n{}\n" "Error msg: {}".format(lc, line, repr(e))) if lc in self._HEAD_ROWS_MANDATORY: raise NasaAmesReadError("Fatal: {}".format(msg)) else: logger.warning(msg) else: _flagmap_idx = 0 if mc == 0: END_VAR_DEF = self._NUM_FIXLINES + self.num_cols_dependent - 1 NUM_HEAD_LINES = self.num_head_lines try: self.var_defs.append(self._read_vardef_line(line)) except Exception as e: logger.warning(repr(e)) elif lc < END_VAR_DEF: var = self._read_vardef_line(line) #if variable corresponds to flag column, assign this #flag column to all previously read variables if var.is_flag: for _var in self.var_defs[_flagmap_idx:]: _var.flag_id = var.name self.var_defs.append(var) _flagmap_idx = len(self.var_defs) try: pass #self.var_defs.append(var) except Exception as e: logger.warning(repr(e)) elif lc == NUM_HEAD_LINES - 1: IN_DATA = True self._data_header = h = [x.strip() for x in line.split()] #append information of first two columns to variable #definition array. self._var_defs.insert( 0, EbasColDef(name=h[0], is_flag=False, is_var=False, unit=self.time_unit)) self._var_defs.insert( 1, EbasColDef(name=h[1], is_flag=False, is_var=False, unit=self.time_unit)) if only_head: return logger.debug("REACHED DATA BLOCK") _insert_invalid = tuple([np.nan] * self.col_num) #elif lc > self._NUM_FIXLINES + 3: elif lc >= END_VAR_DEF + 2: try: name, val = line.split(":") key = name.strip().lower().replace(" ", "_") self.meta[key] = val.strip() except Exception as e: logger.warning("Failed to read line no. {}.\n{}\n" "Error msg: {}\n".format( lc, line, repr(e))) else: logger.debug("Ignoring line no. {}: {}".format(lc, line)) mc += 1 lc += 1 data = np.asarray(data) data[:, 1:] = data[:, 1:] * np.asarray(self.mul_factors) self._data = data if replace_invalid_nan: dep_dat = data[:, 1:] for i, val in enumerate(np.floor(self.vals_invalid)): try: col = dep_dat[:, i] cond = np.floor(col) == val col[cond] = np.nan dep_dat[:, i] = col except: logger.warning("Failed to replace invalid values with " "NaNs in column {}".format( self.col_names[i + 1])) data[:, 1:] = dep_dat self._data = data if convert_timestamps: try: self.compute_time_stamps() except Exception as e: logger.warning("Failed to compute time stamps.\n" "Error message: {}".format(repr(e))) self.init_flags(decode_flags) if quality_check: self._quality_check()