示例#1
0
 def get_meta(self, force_single_value=True, quality_check=True):
     """Return meta-data as dictionary
     
     Parameters
     ----------
     force_single_value : bool
         if True, then each meta value that is list or array,is converted 
         to a single value. 
     quality_check : bool
         if True, and coordinate values are lists or arrays, then the 
         standarad deviation in the values is compared to the upper limits
         allowed in the local variation. The upper limits are specified
         in attr. ``COORD_MAX_VAR``. 
     
     Returns
     -------
     dict
         dictionary containing the retrieved meta-data
         
     Raises
     ------
     AttributeError
         if one of the meta entries is invalid
     MetaDataError
         in case of consistencies in meta data between individual time-stamps
     """
     meta = {}
     for key in self.META_KEYS:
         val = self[key]
         if force_single_value and not isinstance(val, str):
             if not any([isinstance(val, x) for x in [list, np.ndarray]]):
                 raise AttributeError("Invalid value encountered for meta "
                                      "key {}, need str, list or ndarray, "
                                      "got {}".format(key, type(val)))
             if quality_check and not all([x == val[0] for x in val]):
                 logger.debug("Performing quality check for meta data")
                 raise MetaDataError(
                     "Inconsistencies in meta parameter {} "
                     "between different time-stamps".format(key))
             val = val[0]
         meta[key] = val
     return meta
示例#2
0
 def check_and_load(self):
     if not os.path.isfile(self.file_path):
         logger.info('No cache file available for query of dataset '
                     '{}'.format(self.dataset_to_read))
         return False
     
     delete_existing = False
     in_handle = open(self.file_path, 'rb')
     # read meta information about file
     if self.connection_established:
         try:
             use_cache_file = self._check_pkl_head_vs_database(in_handle)
         except Exception as e:
             use_cache_file = False
             delete_existing = True
             logger.exception('File error in cached data file {}. File will '
                              'be removed and data reloaded'
                              'Error: {}'.format(self.file_path,
                                      repr(e)))
         if not use_cache_file:
             # TODO: Should we delete the cache file if it is outdated ???
             logger.info('Aborting reading cache file {}. Aerocom database '
                         'has changed compared to cached version'
                         .format(self.file_name))
             in_handle.close()
             if delete_existing: #something was wrong
                 os.remove(self.file_path)
             return False
     else:
         for k in range(self.LEN_CACHE_HEAD):
             logger.debug(pickle.load(in_handle))
     # everything is okay
     data = pickle.load(in_handle)
     if not isinstance(data, UngriddedData):
         raise TypeError('Unexpected data type stored in cache file, need '
                         'instance of UngriddedData, got {}'.format(type(data)))
     self.loaded_data = data
     logger.info('Successfully loaded data for {} from Cache'.format(self.dataset_to_read))
     return True
示例#3
0
 def get_station_coords(self, force_single_value=True, quality_check=True):
     """Return coordinates as dictionary
     
     Parameters
     ----------
     force_single_value : bool
         if True and coordinate values are lists or arrays, then they are 
         collapsed to single value using mean
     quality_check : bool
         if True, and coordinate values are lists or arrays, then the 
         standarad deviation in the values is compared to the upper limits
         allowed in the local variation. The upper limits are specified
         in attr. ``COORD_MAX_VAR``. 
     
     Returns
     -------
     dict
         dictionary containing the retrieved coordinates
         
     Raises
     ------
     AttributeError
         if one of the coordinate values is invalid
     CoordinateError
         if local variation in either of the three spatial coordinates is
         found too large
     """
     _check_var = False
     vals, stds = {}, {}
     for key in self.COORD_KEYS:
         val = self[key]
         std = 0.0
         if force_single_value and not isinstance(val,
                                                  (float, np.floating)):
             if isinstance(val, (int, np.integer)):
                 val = np.float64(val)
             elif isinstance(val, (list, np.ndarray)):
                 val = np.mean(val)
                 std = np.std(val)
                 if std > 0:
                     _check_var = True
             else:
                 raise AttributeError(
                     "Invalid value encountered for coord "
                     "{}, need float, int, list or ndarray, "
                     "got {}".format(key, type(val)))
         vals[key] = val
         stds[key] = std
     if _check_var:
         logger.debug("Performing quality check for coordinates")
         lat, dlat, dlon, dalt = (vals['stat_lat'], stds['stat_lat'],
                                  stds['stat_lon'], stds['stat_alt'])
         lat_len = 111e3  #approximate length of latitude degree in m
         if self.COORD_MAX_VAR['stat_lat'] < lat_len * dlat:
             raise CoordinateError("Variation in station latitude is "
                                   "exceeding upper limit of {} m".format(
                                       self.COORD_MAX_VAR['stat_lat']))
         elif self.COORD_MAX_VAR['stat_lon'] < (
                 lat_len * np.cos(np.deg2rad(lat)) * dlon):
             raise CoordinateError("Variation in station longitude is "
                                   "exceeding upper limit of {} m".format(
                                       self.COORD_MAX_VAR['stat_lat']))
         elif self.COORD_MAX_VAR['stat_alt'] < dalt:
             raise CoordinateError("Variation in station altitude is "
                                   "exceeding upper limit of {} m".format(
                                       self.COORD_MAX_VAR['stat_lat']))
     return vals
示例#4
0
    def read_file(self,
                  nasa_ames_file,
                  only_head=False,
                  replace_invalid_nan=True,
                  convert_timestamps=True,
                  decode_flags=True,
                  quality_check=True):
        """Read NASA Ames file
        
        Parameters
        ----------
        nasa_ames_file : str
            EBAS NASA Ames file
        only_head : bool
            read only file header
        replace_invalid_nan : bool
            replace all invalid values in the table by NaNs. The invalid values for
            each dependent data column are identified based on the information in 
            the file header.
        convert_timestamps : bool
            compute array of numpy datetime64 timestamps from numeric timestamps
            in data
        decode_flags : bool
            if True, all flags in all flag columns are decoded from floating 
            point representation to 3 integers, e.g. 
            0.111222333 -> 111 222 333
        quality_check : bool
            perform quality check after import (for details see 
            :func:`_quality_check`)
        """
        logger.info("Reading NASA Ames file:\n{}".format(nasa_ames_file))
        lc = 0  #line counter
        dc = 0  #data block line counter
        mc = 0  #meta block counter
        END_VAR_DEF = np.nan  #will be set (info stored in header)
        IN_DATA = False
        data = []
        _insert_invalid = None
        for line in open(nasa_ames_file):
            #print(lc, _NUM_FIXLINES, line)
            if IN_DATA:
                if dc == 0:
                    logger.debug(line)
                try:
                    data.append(
                        tuple([float(x.strip())
                               for x in line.strip().split()]))
                    #data.append([float(x.strip()) for x in line.strip().split()])
                except Exception as e:
                    data.append(_insert_invalid)
                    logger.warning("Failed to read data row {}. "
                                   "Error msg: {}".format(dc, repr(e)))
                dc += 1
            elif lc < self._NUM_FIXLINES:
                try:
                    val = self._H_FIXLINES_CONV[lc](line)
                    attr = self._H_FIXLINES_YIELD[lc]
                    if isinstance(attr, list):
                        for i, attr_id in enumerate(attr):
                            self[attr_id] = val[i]
                    else:
                        self[attr] = val
                except Exception as e:
                    msg = ("Failed to read header row {}.\n{}\n"
                           "Error msg: {}".format(lc, line, repr(e)))
                    if lc in self._HEAD_ROWS_MANDATORY:
                        raise NasaAmesReadError("Fatal: {}".format(msg))
                    else:
                        logger.warning(msg)
            else:
                _flagmap_idx = 0
                if mc == 0:
                    END_VAR_DEF = self._NUM_FIXLINES + self.num_cols_dependent - 1
                    NUM_HEAD_LINES = self.num_head_lines
                    try:
                        self.var_defs.append(self._read_vardef_line(line))
                    except Exception as e:
                        logger.warning(repr(e))

                elif lc < END_VAR_DEF:
                    var = self._read_vardef_line(line)
                    #if variable corresponds to flag column, assign this
                    #flag column to all previously read variables
                    if var.is_flag:
                        for _var in self.var_defs[_flagmap_idx:]:
                            _var.flag_id = var.name
                    self.var_defs.append(var)
                    _flagmap_idx = len(self.var_defs)
                    try:
                        pass
                        #self.var_defs.append(var)
                    except Exception as e:
                        logger.warning(repr(e))

                elif lc == NUM_HEAD_LINES - 1:
                    IN_DATA = True
                    self._data_header = h = [x.strip() for x in line.split()]
                    #append information of first two columns to variable
                    #definition array.
                    self._var_defs.insert(
                        0,
                        EbasColDef(name=h[0],
                                   is_flag=False,
                                   is_var=False,
                                   unit=self.time_unit))
                    self._var_defs.insert(
                        1,
                        EbasColDef(name=h[1],
                                   is_flag=False,
                                   is_var=False,
                                   unit=self.time_unit))
                    if only_head:
                        return
                    logger.debug("REACHED DATA BLOCK")
                    _insert_invalid = tuple([np.nan] * self.col_num)

                #elif lc > self._NUM_FIXLINES + 3:
                elif lc >= END_VAR_DEF + 2:
                    try:
                        name, val = line.split(":")
                        key = name.strip().lower().replace(" ", "_")
                        self.meta[key] = val.strip()
                    except Exception as e:
                        logger.warning("Failed to read line no. {}.\n{}\n"
                                       "Error msg: {}\n".format(
                                           lc, line, repr(e)))
                else:
                    logger.debug("Ignoring line no. {}: {}".format(lc, line))
                mc += 1
            lc += 1

        data = np.asarray(data)

        data[:, 1:] = data[:, 1:] * np.asarray(self.mul_factors)

        self._data = data
        if replace_invalid_nan:
            dep_dat = data[:, 1:]
            for i, val in enumerate(np.floor(self.vals_invalid)):
                try:
                    col = dep_dat[:, i]
                    cond = np.floor(col) == val
                    col[cond] = np.nan
                    dep_dat[:, i] = col
                except:
                    logger.warning("Failed to replace invalid values with "
                                   "NaNs in column {}".format(
                                       self.col_names[i + 1]))
            data[:, 1:] = dep_dat
        self._data = data

        if convert_timestamps:
            try:
                self.compute_time_stamps()
            except Exception as e:
                logger.warning("Failed to compute time stamps.\n"
                               "Error message: {}".format(repr(e)))
        self.init_flags(decode_flags)
        if quality_check:
            self._quality_check()