def browse_database(model_or_obs, verbose=False): """Browse Aerocom database using model or obs ID (or wildcard) Searches database for matches and prints information about all matches found (e.g. available variables, years, etc.) Parameters ---------- model_or_obs : str model or obs ID or search pattern verbose : bool if True, verbosity level will be set to debug, else to critical Returns ------- list list with data_ids of all matches Example ------- >>> import pyaerocom as pya >>> pya.io.browse_database('AATSR*ORAC*v4*') Pyaerocom ReadGridded --------------------- Model ID: AATSR_ORAC_v4.01 Data directory: /lustre/storeA/project/aerocom/aerocom-users-database/CCI-Aerosol/CCI_AEROSOL_Phase2/AATSR_ORAC_v4.01/renamed Available variables: ['abs550aer', 'ang4487aer', 'clt', 'landseamask', 'od550aer', 'od550dust', 'od550gt1aer', 'od550lt1aer', 'pixelcount'] Available years: [2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012] Available time resolutions ['daily'] """ if not verbose: change_verbosity('critical') else: change_verbosity('debug') browser = AerocomBrowser() matches = browser.find_matches(model_or_obs) if len(matches) == 0: print('No match could be found for {}'.format(model_or_obs)) return elif len(matches) > 20: print('Found more than 20 matches for input pattern {}:\n\n' 'Matches: {}\n\n' 'To receive more detailed information, please specify search ID ' 'more accurately'.format(model_or_obs, matches)) return for match in matches: try: if match in const.OBS_IDS_UNGRIDDED: reader = ReadUngridded(match) else: reader = ReadGridded(match) print(reader) except Exception as e: print('Reading failed for {}. Error: {}'.format(match, repr(e))) return matches
f.write('\n') if __name__ == "__main__": if os.path.exists(OUT_STATS): os.remove(OUT_STATS) plt.close('all') helpers.print_file(MODEL_INFO_FILE) ### OPTIONS RUN_EVAL = 1 RELOAD = 1 TEST_FIRST = 0 PLOT_STATIONS = 0 pya.change_verbosity('critical') if RUN_EVAL: ### DATA IMPORT if RELOAD: print('Importing model and obs data, this could take some time') ### Read gridded model data read_models = pya.io.ReadGriddedMulti(MODEL_LIST) read_models.read_individual_years(VARS, YEARS) ### Read gridded obs data read_gridded_obs = pya.io.ReadGriddedMulti(GRIDDED_OBS_NETWORKS) read_gridded_obs.read_individual_years(VARS, YEARS) read_ungridded_obs = pya.io.ReadUngridded() read_ungridded_obs.logger.setLevel(logging.INFO) # Load networks individually for now (easier for analysis below)
rg_avail = False rg_unavail = pytest.mark.skipif(not rg_avail, reason='Skipping tests that require access to reverse_geocode') etopo1_unavail = pytest.mark.skipif(not const.ETOPO1_AVAILABLE, reason='Skipping tests that require access to ETOPO1 data') always_skipped = pytest.mark.skipif(True==True, reason='Seek the answer') testdata_unavail = pytest.mark.skipif(not TESTDATA_AVAIL, reason='Skipping tests that require testdata-minimal.') test_not_working = pytest.mark.skip(reason='Method raises Exception') from pyaerocom import change_verbosity change_verbosity('critical', const.print_log) ### Fixtures representing data # Paths to EMEP data @pytest.fixture(scope='session') def path_emep(): paths = {} emep_path= TESTDATADIR.joinpath(CHECK_PATHS['emep']) paths['daily'] = str(emep_path.joinpath('Base_day.nc')) paths['monthly'] = str(emep_path.joinpath('Base_month.nc')) paths['yearly'] = str(emep_path.joinpath('Base_fullrun.nc')) paths['data_dir'] = str(emep_path) return paths # Example GriddedData object (TM5 model) @pytest.fixture(scope='session')
if not var in data_obj.var_idx: data_obj.var_idx[var] = var_idx metadata[meta_key]['variables'] = vars_avail idx += totnum meta_key = meta_key + 1. # shorten data_obj._data to the right number of points data_obj._data = data_obj._data[:idx] data_obj = data_obj.merge_common_meta() data_obj.data_revision[self.DATASET_NAME] = self.data_revision self.data = data_obj return data_obj if __name__ == "__main__": from pyaerocom import change_verbosity change_verbosity('critical') reader = ReadEbas() vars_to_retrieve = ['absc550aer'] files = reader.get_file_list(vars_to_retrieve) data = reader.read(vars_to_retrieve, last_file=10) stat_data = data.to_station_data(0) print(stat_data) print(data) idx, meta = data._find_common_meta()
from IPython.display import display, clear_output import matplotlib.pyplot as plt import scipy.stats as stats import simplejson as json import numpy as np import datetime import copy import sys import pwlf import seaborn as sns import pickle import os path_out = '../../aerosoltrends/data/test/' #pya.change_verbosity('error') pya.change_verbosity('critical', pya.const.logger) pya.change_verbosity('critical', pya.const.print_log) def get_params(): # computation parameters params = { 'min_dobs': 300, # minimum number of daily observations available in order to keep the station 'min_ntrend': 7, #minimum number of points used to compute a trend 'min_nstat': 2, # minimum number of stations required to compute median 'sig': 0.95, # significance 'min_dim': 5, # minimum number of days required to compute monthly mean 'min_mis': 1, # minimum number of months required to compute seasonal mean 'min_siy': 4, # minimum number of seasons required to compute annual mean 'nseg': 2, # number of segments if no significant linear trend on the time series is found # if use same segments for model and bias than the ones found in obs (to be run before) 'use_obs_seg': True,
_PLOTNAME_BASESTR = 'mALLYEAR{}' TS_TYPES = pya.const.GRID_IO.TS_TYPES def start_stop_from_year(year): start = pya.helpers.to_pandas_timestamp(year) stop = pya.helpers.to_pandas_timestamp('{}-12-31 23:59:59'.format(year)) return (start, stop) if __name__ == "__main__": exceptions = [] pya.change_verbosity('warning') obs_reader = pya.io.ReadUngridded() obs_data = obs_reader.read(OBS_ID, VARS) model_reader = pya.io.ReadGridded(MODEL_ID) var_matches = list( reduce(np.intersect1d, (VARS, model_reader.vars, obs_data.contains_vars))) if len(var_matches) == 0: raise pya.exceptions.DataCoverageError( 'No variable matches between ' '{} and {} for input vars: {}'.format(MODEL_ID, OBS_ID, VARS))
def plotscatter(model_name, model_data=None, obs_data=None, opts=None, verbose=True): """Method to plot scatterplots Todo ---- Complete docstring, review code """ if verbose: change_verbosity(new_level='debug') plt_name = 'SCATTERLOG' var_to_run = opts['VariablesToRun'][0] # global settings (including plot settings) for variable VAR_PARAM = const.VAR_PARAM[var_to_run] obs_network_name = opts['ObsNetworkName'][0] obs_data_as_series = obs_data.to_timeseries(start_date=opts['StartDate'], end_date=opts['EndDate'], freq='D') obs_lats = [ obs_data_as_series[i]['latitude'] for i in range(len(obs_data_as_series)) ] obs_lons = [ obs_data_as_series[i]['longitude'] for i in range(len(obs_data_as_series)) ] obs_names = [ obs_data_as_series[i]['station_name'] for i in range(len(obs_data_as_series)) ] # model_station_data = model_data.interpolate([("latitude", obs_lats), ("longitude", obs_lons)]) # times_as_dt64 = pa.helpers.cftime_to_datetime64(model_station_data.time) # model_data_as_series = pa.helpers.to_time_series_griesie(model_station_data.grid.data, obs_lats, obs_lons, # times_as_dt64, var_name = [var_to_run]) model_data_as_series = model_data.to_time_series([("latitude", obs_lats), ("longitude", obs_lons)]) df_time = pd.DataFrame() df_points = pd.DataFrame() station_no = 0 for i in range(len(obs_data_as_series)): _len = len(obs_data_as_series[i][var_to_run]) # print('{} length: {}'.format(obs_names[i],_len)) if _len > 0: _nansum = np.nansum(obs_data_as_series[i][var_to_run]) # _isnan = np.isnan(_nansum) # print('{} nansum: {:.3f}'.format(obs_names[i],np.nansum(obs_data_as_series[i][var_to_run]))) # print('{} isnan: {}'.format(obs_names[i],_isnan)) if _nansum > np.float_(0.): station_no += 1 # print('{} station_no: {}'.format(obs_names[i],station_no)) else: print('{} removed due to NaNs only'.format(obs_names[i])) else: continue # put obs and model in DataFrame to make them use the same time index df_time_temp = pd.DataFrame(obs_data_as_series[i][var_to_run], columns=[obs_network_name]) df_points = df_points.append(df_time_temp) # df_time_temp[model_name] = model_data_as_series[i][var_to_run]*1.E3 df_time_temp[model_name] = (model_data_as_series[i][var_to_run] * VAR_PARAM['scat_scale_factor']) # df_time has now all time steps where either one of the obs or model data have data # # df_points = df_points.append(pd.DataFrame(np.float_(df_time_temp.values), columns=df_time_temp.columns)) df_time = df_time.append( pd.DataFrame(df_time_temp, columns=df_time_temp.columns)) # remove all indices where either one of the data pairs is NaN # mainly done to get the number of days right. # df_time.corr() gets it right without df_time = df_time.dropna(axis=0, how='any') df_points = df_points.dropna() print('# of measurements: {}'.format(len(df_points))) filter_name = 'WORLD-wMOUNTAINS' filter_name = 'WORLD' time_step_name = 'mALLYEARdaily' # OD550_AER_an2008_YEARLY_WORLD_SCATTERLOG_AeronetSunV3Lev2.0.daily.ps.png # if df_time[model_name].index[0].year != df_time[model_name].index[-1].year: years_covered = df_time[model_name].index[:].year.unique().sort_values() if len(years_covered) > 1: figname = '{}_{}_an{}-{}_{}_{}_{}_{}.png'.format( model_name, var_to_run, years_covered[0], years_covered[-1], time_step_name, filter_name, plt_name, obs_network_name) plotname = "{}-{} {}".format(years_covered[0], years_covered[-1], 'daily') else: figname = '{}_{}_an{}_{}_{}_{}_{}.png'.format(model_name, var_to_run, years_covered[0], time_step_name, filter_name, plt_name, obs_network_name) plotname = "{} {}".format(years_covered[0], 'daily') logger.info(figname) mean = df_time.mean() correlation_coeff = df_time.corr() # IDL: rms=sqrt(total((f_YData-f_Xdata)^2)/n_elements(f_YData)) #sum = df_time.sum() # nmb=total(f_YData-f_Xdata)/total(f_Xdata)*100. # c=n_elements(f_YData) # f_temp=(f_YData-f_Xdata)/(f_YData+f_Xdata) # mnmb=2./c*total(f_temp)*100. # fge=2./c*total(abs(f_temp))*100. # f_YDatabc=f_YData*(total(f_Xdata,/nan)/total(f_YData,/nan)) ; bias corrected model data # rmsbc=sqrt(total((f_YDatabc-f_Xdata)^2)/n_elements(f_YDatabc)) difference = df_time[model_name] - df_time[obs_network_name] num_points = len(df_time) rms = np.sqrt(np.nansum(np.power(difference.values, 2)) / num_points) nmb = np.sum(difference) / np.sum(df_time[obs_network_name]) * 100. tmp = (df_time[model_name] - df_time[obs_network_name]) / ( df_time[model_name] + df_time[obs_network_name]) mnmb = 2. / num_points * np.sum(tmp) * 100. fge = 2. / np.sum(np.abs(tmp)) * 100. df_time.plot.scatter(obs_network_name, model_name, loglog=VAR_PARAM['scat_loglog'], marker='+', color='black') # plot the 1 by 1 line plt.plot(VAR_PARAM['scat_xlim'], VAR_PARAM['scat_ylim'], '-', color='grey') plt.axes().set_aspect('equal') plt.xlim(VAR_PARAM['scat_xlim']) plt.ylim(VAR_PARAM['scat_ylim']) xypos_index = 0 var_str = var_to_run + VAR_PARAM.unit_str plt.axes().annotate("{} #: {} # st: {}".format(var_str, len(df_time), station_no), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=14, color='red') xypos_index += 1 plt.axes().annotate('Obs: {:.3f}'.format(mean[obs_network_name]), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=10, color='red') xypos_index += 1 plt.axes().annotate('Mod: {:.3f}'.format(mean[model_name]), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=10, color='red') xypos_index += 1 plt.axes().annotate('NMB: {:.1f}%'.format(nmb), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=10, color='red') xypos_index += 1 plt.axes().annotate('MNMB: {:.1f}%'.format(mnmb), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=10, color='red') xypos_index += 1 plt.axes().annotate('R: {:.3f}'.format(correlation_coeff.values[0, 1]), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=10, color='red') xypos_index += 1 plt.axes().annotate('RMS: {:.3f}'.format(rms), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=10, color='red') xypos_index += 1 plt.axes().annotate('FGE: {:.3f}'.format(fge), xy=xypos[xypos_index], xycoords='axes fraction', fontsize=10, color='red') # right lower part plt.axes().annotate('{}'.format(plotname), xy=xypos[-2], xycoords='axes fraction', ha='center', fontsize=10, color='black') plt.axes().annotate('{}'.format(filter_name), xy=xypos[-1], xycoords='axes fraction', ha='center', fontsize=10, color='black') plt.savefig(figname, dpi=300) plt.close()
# jupytext: # formats: ipynb,py:percent # text_representation: # extension: .py # format_name: percent # format_version: '1.2' # jupytext_version: 1.2.4 # kernelspec: # display_name: Python 3 # language: python # name: python3 # --- # %% import xarray as xr import numpy as np import matplotlib.pyplot as plt import cartopy.crs as ccrs import seaborn as sns import pandas as pd import pyaerocom as pya import franzihe_functions as fct from glob import glob # %% pya.change_verbosity('critical', log=pya.const.print_log) # don't output warnings pya.__version__ # %%