Exemplo n.º 1
0
def browse_database(model_or_obs, verbose=False):
    """Browse Aerocom database using model or obs ID (or wildcard)

    Searches database for matches and prints information about all matches
    found (e.g. available variables, years, etc.)

    Parameters
    ----------
    model_or_obs : str
        model or obs ID or search pattern
    verbose : bool
        if True, verbosity level will be set to debug, else to critical

    Returns
    -------
    list
        list with data_ids of all matches

    Example
    -------
    >>> import pyaerocom as pya
    >>> pya.io.browse_database('AATSR*ORAC*v4*')
    Pyaerocom ReadGridded
    ---------------------
    Model ID: AATSR_ORAC_v4.01
    Data directory: /lustre/storeA/project/aerocom/aerocom-users-database/CCI-Aerosol/CCI_AEROSOL_Phase2/AATSR_ORAC_v4.01/renamed
    Available variables: ['abs550aer', 'ang4487aer', 'clt', 'landseamask', 'od550aer', 'od550dust', 'od550gt1aer', 'od550lt1aer', 'pixelcount']
    Available years: [2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012]
    Available time resolutions ['daily']

    """
    if not verbose:
        change_verbosity('critical')
    else:
        change_verbosity('debug')
    browser = AerocomBrowser()
    matches = browser.find_matches(model_or_obs)
    if len(matches) == 0:
        print('No match could be found for {}'.format(model_or_obs))
        return
    elif len(matches) > 20:
        print('Found more than 20 matches for input pattern {}:\n\n'
              'Matches: {}\n\n'
              'To receive more detailed information, please specify search ID '
              'more accurately'.format(model_or_obs, matches))
        return
    for match in matches:
        try:
            if match in const.OBS_IDS_UNGRIDDED:
                reader = ReadUngridded(match)
            else:
                reader = ReadGridded(match)
            print(reader)
        except Exception as e:
            print('Reading failed for {}. Error: {}'.format(match, repr(e)))
    return matches
Exemplo n.º 2
0
        f.write('\n')


if __name__ == "__main__":
    if os.path.exists(OUT_STATS):
        os.remove(OUT_STATS)

    plt.close('all')
    helpers.print_file(MODEL_INFO_FILE)
    ### OPTIONS
    RUN_EVAL = 1
    RELOAD = 1
    TEST_FIRST = 0
    PLOT_STATIONS = 0

    pya.change_verbosity('critical')
    if RUN_EVAL:
        ### DATA IMPORT
        if RELOAD:
            print('Importing model and obs data, this could take some time')
            ### Read gridded model data
            read_models = pya.io.ReadGriddedMulti(MODEL_LIST)
            read_models.read_individual_years(VARS, YEARS)

            ### Read gridded obs data
            read_gridded_obs = pya.io.ReadGriddedMulti(GRIDDED_OBS_NETWORKS)
            read_gridded_obs.read_individual_years(VARS, YEARS)

            read_ungridded_obs = pya.io.ReadUngridded()
            read_ungridded_obs.logger.setLevel(logging.INFO)
            # Load networks individually for now (easier for analysis below)
Exemplo n.º 3
0
    rg_avail = False

rg_unavail = pytest.mark.skipif(not rg_avail,
                   reason='Skipping tests that require access to reverse_geocode')

etopo1_unavail = pytest.mark.skipif(not const.ETOPO1_AVAILABLE,
                   reason='Skipping tests that require access to ETOPO1 data')
always_skipped = pytest.mark.skipif(True==True, reason='Seek the answer')

testdata_unavail = pytest.mark.skipif(not TESTDATA_AVAIL,
                    reason='Skipping tests that require testdata-minimal.')

test_not_working = pytest.mark.skip(reason='Method raises Exception')

from pyaerocom import change_verbosity
change_verbosity('critical', const.print_log)
### Fixtures representing data

# Paths to EMEP data
@pytest.fixture(scope='session')
def path_emep():
    paths = {}
    emep_path= TESTDATADIR.joinpath(CHECK_PATHS['emep'])
    paths['daily'] = str(emep_path.joinpath('Base_day.nc'))
    paths['monthly'] = str(emep_path.joinpath('Base_month.nc'))
    paths['yearly'] = str(emep_path.joinpath('Base_fullrun.nc'))
    paths['data_dir'] = str(emep_path)
    return paths

# Example GriddedData object (TM5 model)
@pytest.fixture(scope='session')
Exemplo n.º 4
0
                if not var in data_obj.var_idx:
                    data_obj.var_idx[var] = var_idx
            metadata[meta_key]['variables'] = vars_avail
            idx += totnum
            meta_key = meta_key + 1.

        # shorten data_obj._data to the right number of points
        data_obj._data = data_obj._data[:idx]
        data_obj = data_obj.merge_common_meta()
        data_obj.data_revision[self.DATASET_NAME] = self.data_revision
        self.data = data_obj
        return data_obj

if __name__ == "__main__":
    from pyaerocom import change_verbosity
    change_verbosity('critical')

    reader = ReadEbas()

    vars_to_retrieve = ['absc550aer']

    files = reader.get_file_list(vars_to_retrieve)

    data = reader.read(vars_to_retrieve, last_file=10)

    stat_data = data.to_station_data(0)

    print(stat_data)
    print(data)

    idx, meta = data._find_common_meta()
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import scipy.stats as stats
import simplejson as json
import numpy as np
import datetime
import copy
import sys
import pwlf
import seaborn as sns
import pickle
import os

path_out = '../../aerosoltrends/data/test/'
#pya.change_verbosity('error')
pya.change_verbosity('critical', pya.const.logger)
pya.change_verbosity('critical', pya.const.print_log)

def get_params():
    # computation parameters
    params = {
        'min_dobs': 300, # minimum number of daily observations available in order to keep the station
        'min_ntrend': 7, #minimum number of points used  to compute a trend
        'min_nstat': 2,  # minimum number of stations required to compute median
        'sig': 0.95,  # significance
        'min_dim': 5,  # minimum number of days required to compute monthly mean
        'min_mis': 1,  # minimum number of months required to compute seasonal mean
        'min_siy': 4,  # minimum number of seasons required to compute annual mean
        'nseg': 2,  # number of segments if no significant linear trend on the time series is found
        # if use same segments for model and bias than the ones found in obs (to be run before)
        'use_obs_seg': True,
_PLOTNAME_BASESTR = 'mALLYEAR{}'

TS_TYPES = pya.const.GRID_IO.TS_TYPES


def start_stop_from_year(year):
    start = pya.helpers.to_pandas_timestamp(year)
    stop = pya.helpers.to_pandas_timestamp('{}-12-31 23:59:59'.format(year))
    return (start, stop)


if __name__ == "__main__":

    exceptions = []
    pya.change_verbosity('warning')

    obs_reader = pya.io.ReadUngridded()
    obs_data = obs_reader.read(OBS_ID, VARS)

    model_reader = pya.io.ReadGridded(MODEL_ID)

    var_matches = list(
        reduce(np.intersect1d,
               (VARS, model_reader.vars, obs_data.contains_vars)))

    if len(var_matches) == 0:
        raise pya.exceptions.DataCoverageError(
            'No variable matches between '
            '{} and {} for input vars: {}'.format(MODEL_ID, OBS_ID, VARS))
Exemplo n.º 7
0
def plotscatter(model_name,
                model_data=None,
                obs_data=None,
                opts=None,
                verbose=True):
    """Method to plot scatterplots

    Todo
    ----

    Complete docstring, review code

    """
    if verbose:
        change_verbosity(new_level='debug')

    plt_name = 'SCATTERLOG'
    var_to_run = opts['VariablesToRun'][0]

    # global settings (including plot settings) for variable
    VAR_PARAM = const.VAR_PARAM[var_to_run]

    obs_network_name = opts['ObsNetworkName'][0]
    obs_data_as_series = obs_data.to_timeseries(start_date=opts['StartDate'],
                                                end_date=opts['EndDate'],
                                                freq='D')
    obs_lats = [
        obs_data_as_series[i]['latitude']
        for i in range(len(obs_data_as_series))
    ]
    obs_lons = [
        obs_data_as_series[i]['longitude']
        for i in range(len(obs_data_as_series))
    ]
    obs_names = [
        obs_data_as_series[i]['station_name']
        for i in range(len(obs_data_as_series))
    ]
    # model_station_data = model_data.interpolate([("latitude", obs_lats), ("longitude", obs_lons)])
    # times_as_dt64 = pa.helpers.cftime_to_datetime64(model_station_data.time)
    # model_data_as_series = pa.helpers.to_time_series_griesie(model_station_data.grid.data, obs_lats, obs_lons,
    #                                                          times_as_dt64, var_name = [var_to_run])

    model_data_as_series = model_data.to_time_series([("latitude", obs_lats),
                                                      ("longitude", obs_lons)])

    df_time = pd.DataFrame()
    df_points = pd.DataFrame()
    station_no = 0
    for i in range(len(obs_data_as_series)):
        _len = len(obs_data_as_series[i][var_to_run])
        # print('{} length: {}'.format(obs_names[i],_len))
        if _len > 0:
            _nansum = np.nansum(obs_data_as_series[i][var_to_run])
            # _isnan = np.isnan(_nansum)
            # print('{} nansum: {:.3f}'.format(obs_names[i],np.nansum(obs_data_as_series[i][var_to_run])))
            # print('{} isnan: {}'.format(obs_names[i],_isnan))
            if _nansum > np.float_(0.):
                station_no += 1
                # print('{} station_no: {}'.format(obs_names[i],station_no))
            else:
                print('{} removed due to NaNs only'.format(obs_names[i]))
        else:
            continue
        # put obs and model in DataFrame to make them use the same time index
        df_time_temp = pd.DataFrame(obs_data_as_series[i][var_to_run],
                                    columns=[obs_network_name])
        df_points = df_points.append(df_time_temp)
        # df_time_temp[model_name] = model_data_as_series[i][var_to_run]*1.E3
        df_time_temp[model_name] = (model_data_as_series[i][var_to_run] *
                                    VAR_PARAM['scat_scale_factor'])
        # df_time has now all time steps where either one of the obs or model data have data
        #
        # df_points = df_points.append(pd.DataFrame(np.float_(df_time_temp.values), columns=df_time_temp.columns))
        df_time = df_time.append(
            pd.DataFrame(df_time_temp, columns=df_time_temp.columns))

    # remove all indices where either one of the data pairs is NaN
    # mainly done to get the number of days right.
    # df_time.corr() gets it right without
    df_time = df_time.dropna(axis=0, how='any')
    df_points = df_points.dropna()
    print('# of measurements: {}'.format(len(df_points)))

    filter_name = 'WORLD-wMOUNTAINS'
    filter_name = 'WORLD'
    time_step_name = 'mALLYEARdaily'
    # OD550_AER_an2008_YEARLY_WORLD_SCATTERLOG_AeronetSunV3Lev2.0.daily.ps.png
    # if df_time[model_name].index[0].year != df_time[model_name].index[-1].year:
    years_covered = df_time[model_name].index[:].year.unique().sort_values()
    if len(years_covered) > 1:
        figname = '{}_{}_an{}-{}_{}_{}_{}_{}.png'.format(
            model_name, var_to_run, years_covered[0], years_covered[-1],
            time_step_name, filter_name, plt_name, obs_network_name)
        plotname = "{}-{} {}".format(years_covered[0], years_covered[-1],
                                     'daily')
    else:
        figname = '{}_{}_an{}_{}_{}_{}_{}.png'.format(model_name, var_to_run,
                                                      years_covered[0],
                                                      time_step_name,
                                                      filter_name, plt_name,
                                                      obs_network_name)
        plotname = "{} {}".format(years_covered[0], 'daily')

    logger.info(figname)

    mean = df_time.mean()
    correlation_coeff = df_time.corr()
    # IDL: rms=sqrt(total((f_YData-f_Xdata)^2)/n_elements(f_YData))
    #sum = df_time.sum()
    # nmb=total(f_YData-f_Xdata)/total(f_Xdata)*100.
    # c=n_elements(f_YData)
    # f_temp=(f_YData-f_Xdata)/(f_YData+f_Xdata)
    # mnmb=2./c*total(f_temp)*100.
    # fge=2./c*total(abs(f_temp))*100.
    # f_YDatabc=f_YData*(total(f_Xdata,/nan)/total(f_YData,/nan)) ; bias corrected model data
    # rmsbc=sqrt(total((f_YDatabc-f_Xdata)^2)/n_elements(f_YDatabc))
    difference = df_time[model_name] - df_time[obs_network_name]
    num_points = len(df_time)
    rms = np.sqrt(np.nansum(np.power(difference.values, 2)) / num_points)
    nmb = np.sum(difference) / np.sum(df_time[obs_network_name]) * 100.
    tmp = (df_time[model_name] - df_time[obs_network_name]) / (
        df_time[model_name] + df_time[obs_network_name])
    mnmb = 2. / num_points * np.sum(tmp) * 100.
    fge = 2. / np.sum(np.abs(tmp)) * 100.

    df_time.plot.scatter(obs_network_name,
                         model_name,
                         loglog=VAR_PARAM['scat_loglog'],
                         marker='+',
                         color='black')
    # plot the 1 by 1 line
    plt.plot(VAR_PARAM['scat_xlim'], VAR_PARAM['scat_ylim'], '-', color='grey')
    plt.axes().set_aspect('equal')

    plt.xlim(VAR_PARAM['scat_xlim'])
    plt.ylim(VAR_PARAM['scat_ylim'])
    xypos_index = 0
    var_str = var_to_run + VAR_PARAM.unit_str
    plt.axes().annotate("{} #: {} # st: {}".format(var_str, len(df_time),
                                                   station_no),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=14,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('Obs: {:.3f}'.format(mean[obs_network_name]),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('Mod: {:.3f}'.format(mean[model_name]),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('NMB: {:.1f}%'.format(nmb),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('MNMB: {:.1f}%'.format(mnmb),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('R: {:.3f}'.format(correlation_coeff.values[0, 1]),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('RMS: {:.3f}'.format(rms),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    xypos_index += 1
    plt.axes().annotate('FGE: {:.3f}'.format(fge),
                        xy=xypos[xypos_index],
                        xycoords='axes fraction',
                        fontsize=10,
                        color='red')
    # right lower part
    plt.axes().annotate('{}'.format(plotname),
                        xy=xypos[-2],
                        xycoords='axes fraction',
                        ha='center',
                        fontsize=10,
                        color='black')
    plt.axes().annotate('{}'.format(filter_name),
                        xy=xypos[-1],
                        xycoords='axes fraction',
                        ha='center',
                        fontsize=10,
                        color='black')

    plt.savefig(figname, dpi=300)
    plt.close()
#   jupytext:
#     formats: ipynb,py:percent
#     text_representation:
#       extension: .py
#       format_name: percent
#       format_version: '1.2'
#       jupytext_version: 1.2.4
#   kernelspec:
#     display_name: Python 3
#     language: python
#     name: python3
# ---

# %%
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import seaborn as sns
import pandas as pd
import pyaerocom as pya
import franzihe_functions as fct
from glob import glob

# %%
pya.change_verbosity('critical',
                     log=pya.const.print_log)  # don't output warnings
pya.__version__

# %%