Exemplo n.º 1
0
def test_save_to_msgpack():

    data_file = os.path.join('simar', 'SIMAR_1052046')
    file_name = 'full_simar_preprocessed.msg'
    path = 'D:\\REPOSITORIO GIT\\protocol_project\\data\\intermediate_files'

    # Read simar
    data_simar, _ = read.simar(data_file, tests.full_data_path)

    # Preproccesing simar
    time_step = missing_values.find_timestep(data_simar, n_random_values=10)
    data_clean = missing_values.erase_null_values(data_simar, method='all')
    data_simar_interp = missing_values.fill_missing_values(
        data_clean,
        time_step,
        technique='interpolation',
        method='nearest',
        limit=720,
        limit_direction='both')
    # Check missing values
    miss_values = missing_values.find_missing_values(data_simar_interp,
                                                     time_step)

    # Save simar
    if miss_values.empty:
        save_to_msgpack(data_simar_interp, file_name, path)

    return
Exemplo n.º 2
0
def test_fill_missing_values():
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path)

    # Calculate the time step
    time_step = missing_values.find_timestep(data_simar)

    # Fill missing values
    data_fill = missing_values.fill_missing_values(data_simar,
                                                   time_step,
                                                   technique='interpolation',
                                                   method='nearest',
                                                   limit=24,
                                                   limit_direction='both')

    tolerance = 0.01
    assert data_fill.loc['1958-01-04 08',
                         'Hm0'] == pytest.approx(2.1, tolerance)
    assert data_fill.loc['1958-01-04 12',
                         'Tp'] == pytest.approx(10.5, tolerance)
    assert data_fill.loc['1958-01-04 14',
                         'Tp'] == pytest.approx(10.6, tolerance)
    assert data_fill.loc['1960-12-31 19',
                         'Hm0'] == pytest.approx(0.6, tolerance)
Exemplo n.º 3
0
def test_missing_values_report():
    # Input
    file_name = 'gaps_report.csv'
    path = os.path.join('.', '..', '..', 'report', 'tests', 'output', 'tables')
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path)

    # Calculate the time step
    time_step = missing_values.find_timestep(data_simar)
    # Find gaps
    data_gaps = missing_values.find_missing_values(data_simar, time_step)
    # Gaps report
    data_gaps_report = missing_values.missing_values_report(
        data_simar, data_gaps)
    missing_values.missing_values_report_to_file(data_gaps_report, file_name,
                                                 path)
    # Plot
    missing_values.plot_missing_values(data=data_simar,
                                       data_column='Hm0',
                                       data_gaps=data_gaps,
                                       title='',
                                       var_name='Hm0',
                                       var_unit='m',
                                       fig_filename='',
                                       circular=False,
                                       label='Hm0')
Exemplo n.º 4
0
def test_find_timestep():
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short', data_path)

    t_step = missing_values.find_timestep(data_simar)

    assert t_step == pd.timedelta(hours=1)
Exemplo n.º 5
0
def test_find_missing_values():
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path)

    # Calculate the time step
    time_step = missing_values.find_timestep(data_simar)

    data_gaps = missing_values.find_missing_values(data_simar, time_step)

    # Representation of the gaps
    fig = plt.figure()
    ax = fig.gca()

    ax.plot(data_simar.loc[:, 'Hm0'])
    ax.plot(data_simar.loc[data_gaps.loc[:, 'pos_ini'], 'Hm0'],
            'k.',
            markersize=10)
    ax.plot(data_simar.loc[data_gaps.loc[:, 'pos_fin'], 'Hm0'],
            'k.',
            markersize=10)

    fig.show()
Exemplo n.º 6
0
def test_figuras_cuantiles_ajuste_no_estacionario():

    #%% Input data
    # Initial year, number of years, number of valid  data in a year
    anocomienzo, duracion, umbralano = (2018, 85, 0.5)
    # Type of fit (0-GUI, 1-stationary, 2-nonstationary)
    ant = [2, 2, 2, 2, 2, 2]
    # Fourier order for nonstationary analysis
    no_ord = [4, 4, 4, 4, 4, 4]
    # Number of simulations
    no_sim = 350
    # Type of fit functions
    fun = [st.lognorm, lognorm2, normtrun, st.weibull_min, normtrun, st.norm]
    # Number of normals
    no_norm = [False, False, 2, False, 2, False]

    #%% Read data

    # Import wave data (from simar offshore)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_wave.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_wave = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_wave = modf_wave.resample('H').interpolate()

    # Import wind data (from simar)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_wind.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_wind = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_wind = modf_wind.resample('H').interpolate()

    # Import sea level pressure (from era)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_sea_level_pressure.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_slp = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_slp = modf_slp.resample('H').interpolate()

    # Group into dataframe
    wave_wind = pd.concat([modf_wave, modf_wind, modf_slp], axis=1)
    wave_wind.columns = ['hs', 'tp', 'dh', 'vv', 'dv', 'slp']

    # Delete rows where with no common values
    wave_wind.dropna(how='any', inplace=True)

    #%% Preprocessing
    t_step = missing_values.find_timestep(wave_wind)  # Find tstep
    data_gaps = missing_values.find_missing_values(wave_wind, t_step)
    wave_wind = missing_values.fill_missing_values(wave_wind,
                                                   t_step,
                                                   technique='interpolation',
                                                   method='nearest',
                                                   limit=16 * 24,
                                                   limit_direction='both')
    data_gaps_after = missing_values.find_missing_values(wave_wind, t_step)

    # Add noise for VAR
    noise = np.random.rand(wave_wind.shape[0], wave_wind.shape[1]) * 1e-2
    wave_wind = wave_wind + noise

    # Save_to_pickle
    wave_wind.to_pickle('wave_wind_offshore.p')

    # Group into list of dataframes
    df = list()
    df.append(pd.DataFrame(wave_wind['hs']))
    df.append(pd.DataFrame(wave_wind['tp']))
    df.append(pd.DataFrame(wave_wind['dh']))
    df.append(pd.DataFrame(wave_wind['vv']))
    df.append(pd.DataFrame(wave_wind['dv']))
    df.append(pd.DataFrame(wave_wind['slp']))

    d_frame = pd.DataFrame(wave_wind)

    fichero = os.path.join('output', 'analisis', 'data_graph_offshore.p')
    with open(fichero, 'rb') as filehandle:
        # store the data as binary data stream
        data_graph = pickle.load(filehandle)

    # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL)
    plot_analisis.cuantiles_ne_paper(fun, *data_graph)
Exemplo n.º 7
0
def test_pdf_cdf_simulaciones():
    #%% Input data
    # Number of simulations
    no_sim = 100
    # Preparo la figura
    plt.rcParams.update({'font.size': 12})
    fig3, axes3 = plt.subplots(3, 3, figsize=(12, 10))
    plt.delaxes(axes3[2, 1])
    #%% Read data

    # Import wave data (from simar offshore)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_wave.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_wave = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_wave = modf_wave.resample('H').interpolate()

    # Import wind data (from simar)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_wind.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_wind = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_wind = modf_wind.resample('H').interpolate()

    # Import sea level pressure (from era)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_sea_level_pressure.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_slp = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_slp = modf_slp.resample('H').interpolate()

    # Import sea level pressure (from era)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_astronomical_tide.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_at = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_at = modf_at.resample('H').interpolate()
    at_hindcast_df = pd.DataFrame(modf_at)

    # Import sea level pressure (from era)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_astronomical_tide_forecast.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_at_fc = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_at_fc = modf_at_fc.resample('H').interpolate()
    at_forecast_df = pd.DataFrame(modf_at_fc)

    # Group into dataframe
    wave_wind = pd.concat([modf_wave, modf_wind, modf_slp], axis=1)
    wave_wind.columns = ['hs', 'tp', 'dh', 'vv', 'dv', 'slp']

    # Delete rows where with no common values
    wave_wind.dropna(how='any', inplace=True)

    # Lectura de descarga fluvial
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_river_discharge.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_rd = MetOceanDF.read_file(path_name)

    # Group into dataframe
    river_discharge = pd.DataFrame(modf_rd)
    # Delete rows where with no common values
    river_discharge.dropna(how='any', inplace=True)

    #%% Preprocessing
    t_step = missing_values.find_timestep(wave_wind)  # Find tstep
    data_gaps = missing_values.find_missing_values(wave_wind, t_step)
    wave_wind = missing_values.fill_missing_values(wave_wind,
                                                   t_step,
                                                   technique='interpolation',
                                                   method='nearest',
                                                   limit=16 * 24,
                                                   limit_direction='both')
    data_gaps_after = missing_values.find_missing_values(wave_wind, t_step)

    # Add noise for VAR
    noise = np.random.rand(wave_wind.shape[0], wave_wind.shape[1]) * 1e-2
    wave_wind = wave_wind + noise

    # Save_to_pickle
    wave_wind.to_pickle('wave_wind_offshore.p')

    # Group into list of dataframes
    df = list()
    df.append(pd.DataFrame(wave_wind['hs']))
    df.append(pd.DataFrame(wave_wind['tp']))
    df.append(pd.DataFrame(wave_wind['dh']))
    df.append(pd.DataFrame(wave_wind['vv']))
    df.append(pd.DataFrame(wave_wind['dv']))
    df.append(pd.DataFrame(wave_wind['slp']))

    for i in tqdm(range(1, no_sim)):
        file_name_simar_sim = os.path.join(
            'output', 'simulacion', 'series_temporales',
            'wave_wind_slp_offshore_500',
            'wave_wind_slp_guadalete_offshore_sim_' + str(i).zfill(4) + '.txt')

        file_name_rd_sim = os.path.join(
            'output', 'simulacion', 'series_temporales',
            'descarga_fluvial_500',
            'descarga_fluvial_guadalete_sim_' + str(i).zfill(4) + '.txt')

        df_simar_sim = pd.read_table(file_name_simar_sim, index_col=0)
        df_rd_sim = pd.read_table(file_name_rd_sim, index_col=0)

        # Cdf Pdf
        data = df_simar_sim['hs']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[0, 0].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[0, 0].set_ylabel('CDF', fontsize=16)
        axes3[0, 0].set_xlabel('$H_{m0} (m)$', fontsize=16)
        axes3[0, 0].set_xticks([0, 5, 10])
        axes3[0, 0].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[0, 0].grid(True)
        axes3[0, 0].set_xlim([0, 10])
        axes3[0, 0].set_ylim([0, 1.05])

        data = df_simar_sim['tp']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[0, 1].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[0, 1].set_ylabel('', fontsize=16)
        axes3[0, 1].set_xlabel('$T_{p} (s)$', fontsize=16)
        axes3[0, 1].set_xticks([0, 12, 24])
        axes3[0, 1].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[0, 1].set_yticklabels([])
        axes3[0, 1].grid(True)
        axes3[0, 1].set_xlim([0, 24])
        axes3[0, 1].set_ylim([0, 1.05])

        data = df_simar_sim['dh']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[0, 2].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[0, 2].set_ylabel('', fontsize=16)
        axes3[0, 2].set_xlabel('$w_{theta} (^\circ)$', fontsize=16)
        axes3[0, 2].set_xticks([0, 180, 360])
        axes3[0, 2].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[0, 2].set_yticklabels([])
        axes3[0, 2].grid(True)
        axes3[0, 2].set_xlim([0, 360])
        axes3[0, 2].set_ylim([0, 1.05])

        data = df_simar_sim['vv']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[1, 0].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[1, 0].set_ylabel('CDF', fontsize=16)
        axes3[1, 0].set_xlabel('$u_{10} (m/s)$', fontsize=16)
        axes3[1, 0].set_xticks([0, 15, 30])
        axes3[1, 0].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[1, 0].grid(True)
        axes3[1, 0].set_xlim([0, 30])
        axes3[1, 0].set_ylim([0, 1.05])

        data = df_simar_sim['dv']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[1, 1].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[1, 1].set_ylabel('', fontsize=16)
        axes3[1, 1].set_xlabel('$u_{\\theta} (^\circ)$', fontsize=16)
        axes3[1, 1].set_xticks([0, 180, 360])
        axes3[1, 1].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[1, 1].set_yticklabels([])
        axes3[1, 1].grid(True)
        axes3[1, 1].set_xlim([0, 360])
        axes3[1, 1].set_ylim([0, 1.05])

        data = df_simar_sim['slp']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[1, 2].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[1, 2].set_ylabel('', fontsize=16)
        axes3[1, 2].set_xlabel('$slp (mbar)$', fontsize=16)
        axes3[1, 2].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[1, 2].set_yticklabels([])
        axes3[1, 2].set_xticks([980, 1015, 1050])
        axes3[1, 2].grid(True)
        axes3[1, 2].set_xlim([980, 1050])
        axes3[1, 2].set_ylim([0, 1.05])

        data = at_forecast_df['Eta']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[2, 0].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[2, 0].set_ylabel('CDF', fontsize=16)
        axes3[2, 0].set_xlabel('$A_{AT} (m)$', fontsize=16)
        axes3[2, 0].set_xticks([-2, 0, 2])
        axes3[2, 0].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[2, 0].grid(True)
        axes3[2, 0].set_xlim([-2, 2.])
        axes3[2, 0].set_ylim([0, 1.05])

        axes3[2, 1].set_xticklabels([])
        axes3[2, 1].set_yticklabels([])

        data = river_discharge['Q']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[2, 2].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[2, 2].set_ylabel('', fontsize=16)
        axes3[2, 2].set_xlabel('$Q (m^{3}/s)$', fontsize=16)
        axes3[2, 2].set_xticks([0, 250, 500])
        axes3[2, 2].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[2, 2].set_yticklabels([])
        axes3[2, 2].grid(True)
        axes3[2, 2].set_xlim([0, 500])
        axes3[2, 2].set_ylim([0, 1.05])

    data = wave_wind['hs']
    paso = 0.1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[0, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['tp']
    paso = 1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[0, 1].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['dh']
    paso = 12
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[0, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['vv']
    paso = 0.1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[1, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['dv']
    bins = 8
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[1, 1].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['slp']
    paso = 1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[1, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = at_forecast_df['Eta']
    ecdf = empirical_distributions.ecdf_histogram(data)
    axes3[2, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = river_discharge['Q']
    paso = 1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    axes3[2, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2)
    plt.tight_layout()

    fig3.savefig(
        os.path.join('output', 'analisis', 'graficas',
                     'ecdf_historico_simulacion.pdf'))
    fig3.savefig(
        os.path.join('output', 'analisis', 'graficas',
                     'ecdf_historico_simulacion.png'))
Exemplo n.º 8
0
def test_river_discharge_simulation():
    # Modules activation and deactivation
    # analysis = False
    # cdf_pdf_representation = False
    # temporal_dependency = False
    # climatic_events_fitting = True
    # threshold_checking_for_simulation = False
    # simulation_cycles = True
    analysis = True
    cdf_pdf_representation = False
    temporal_dependency = False
    climatic_events_fitting = True
    threshold_checking_for_simulation = False
    simulation_cycles = True

    #%% Input data
    # Initial year, number of years, number of valid  data in a year
    anocomienzo, duracion, umbralano = (2018, 10, 0.8)
    # Type of fit (0-GUI, 1-stationary, 2-nonstationary)
    ant = [2]
    # Fourier order for nonstationary analysis
    no_ord_cycles = [2]
    no_ord_calms = [2]
    # Number of simulations
    no_sim = 1
    # Type of fit functions
    fun_cycles = [st.exponweib]
    fun_calms = [st.norm]
    # Number of normals
    no_norm_cycles = [False]
    no_norm_calms = [False]
    f_mix_cycles = [False]
    mod_cycles = [[0, 0, 0, 0]]

    # Cycles River discharge
    threshold_cycles = 25
    # minimum_interarrival_time = pd.Timedelta('250 days')
    # minimum_cycle_length = pd.Timedelta('5 days')
    minimum_interarrival_time = pd.Timedelta('7 days')
    minimum_cycle_length = pd.Timedelta('2 days')

    # Cycles SPEI
    threshold_spei = 0
    minimum_interarrival_time_spei = pd.Timedelta('150 days')
    minimum_cycle_length_spei = pd.Timedelta('150 days')

    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = True
    extra_info = True

    #%% Read data
    # Import river discharge data when all dams were active
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_river_discharge.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_rd = MetOceanDF.read_file(path_name)

    # Group into dataframe
    river_discharge = pd.DataFrame(modf_rd)

    # Delete rows where with no common values
    river_discharge.dropna(how='any', inplace=True)

    # Import complete rive discharge historic data
    # All historic river discharge
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'caudales.txt'
    path_name = os.path.join(data_path, modf_file_name)
    modf_all = pd.read_table(path_name, header=None, delim_whitespace=True)
    date_col = dates.extract_date(modf_all.iloc[:, 0:4])
    modf_all.index = date_col
    modf_all.drop(modf_all.columns[0:4], axis=1, inplace=True)
    modf_all.columns = ['Q']

    #%% Preprocessing
    t_step = missing_values.find_timestep(river_discharge)  # Find tstep
    data_gaps = missing_values.find_missing_values(river_discharge, t_step)
    river_discharge = missing_values.fill_missing_values(
        river_discharge,
        t_step,
        technique='interpolation',
        method='nearest',
        limit=16 * 24,
        limit_direction='both')
    data_gaps_after = missing_values.find_missing_values(
        river_discharge, t_step)

    # Add noise for VAR
    noise = np.random.rand(river_discharge.shape[0],
                           river_discharge.shape[1]) * 1e-2
    river_discharge = river_discharge + noise

    # Save_to_pickle
    river_discharge.to_pickle('river_discharge.p')

    # Group into list of dataframes
    df = list()
    df.append(pd.DataFrame(river_discharge['Q']))

    #%% Cycles and calms calculation
    cycles, calm_periods, info = extremal.extreme_events(
        river_discharge, 'Q', threshold_cycles, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Calculate duration of the cycles
    dur_cycles = extremal.events_duration(cycles)
    dur_cycles_description = dur_cycles.describe()

    sample_cycles = pd.DataFrame(info['data_cycles'].iloc[:, 0])
    noise = np.random.rand(sample_cycles.shape[0],
                           sample_cycles.shape[1]) * 1e-2
    sample_cycles = sample_cycles + noise

    sample_calms = pd.DataFrame(info['data_calm_periods'])
    noise = np.random.rand(sample_calms.shape[0], sample_calms.shape[1]) * 1e-2
    sample_calms = sample_calms + noise

    #%% CLIMATIC INDICES
    # Sunspots
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'sunspot.csv'
    path_name = os.path.join(data_path, modf_file_name)
    sunspot = pd.read_csv(path_name,
                          header=None,
                          delim_whitespace=True,
                          parse_dates=[[0, 1]],
                          index_col=0)
    sunspot = sunspot.drop([2, 4, 5], axis=1)

    # SPEI
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'spei_cadiz.csv'
    path_name = os.path.join(data_path, modf_file_name)
    spei = pd.read_csv(path_name, sep=',')
    spei.index = sunspot.index[2412:3233]

    # Calculate cycles over SPEI
    spei = pd.DataFrame(spei.loc[:, 'SPEI_12'] * 100).dropna()
    cycles_spei, calm_periods_spei, info_spei = extremal.extreme_events(
        spei, 'SPEI_12', threshold_spei, minimum_interarrival_time_spei,
        minimum_cycle_length_spei, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    peaks_over_thres_spei = extremal.events_max(cycles_spei)

    # Plot peaks
    peaks_over_thres = extremal.events_max(cycles)

    # Represent cycles
    fig1 = plt.figure(figsize=(20, 20))
    ax = plt.axes()
    ax.plot(river_discharge)
    ax.axhline(threshold_cycles, color='lightgray')
    ax.plot(spei.loc[:, 'SPEI_12'] * 100, color='0.75', linewidth=2)
    # Plot cycles
    # for cycle in cycles_all:
    #     ax.plot(cycle, 'sandybrown', marker='.', markersize=5)
    #     # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
    #     # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
    for cycle in cycles:
        ax.plot(cycle, 'g', marker='.', markersize=5)
        # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
        # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
    for cycle in cycles_spei:
        ax.plot(cycle, 'k', marker='.', markersize=5, linewidth=2)
        ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=15)
        ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=15)
    ax.plot(peaks_over_thres, '.r', markersize=15)
    ax.plot(peaks_over_thres_spei, '.c', markersize=15)
    ax.grid()
    ax.set_xlim([datetime.date(1970, 01, 01), datetime.date(2018, 04, 11)])
    ax.set_ylim([-5, 500])
    fig1.savefig(
        os.path.join('output', 'analisis', 'graficas',
                     'ciclos_river_discharge_spei.png'))

    #%% # ANALISIS CLIMATICO (0: PARA SALTARLO, 1: PARA HACERLO; LO MISMO PARA TODOS ESTOS IF)
    if analysis:
        if cdf_pdf_representation:
            for i in range(len(df)):
                # DIBUJO LAS CDF Y PDF DE LOS REGISTROS
                plot_analisis.cdf_pdf_registro(df[i], df[i].columns[0])
                plt.pause(0.5)

        #%%  THEORETICAL FIT CYCLES
        data_cycles = sample_cycles['Q']

        # Empirical cdf
        ecdf = empirical_distributions.ecdf_histogram(data_cycles)
        # Fit the variable to an extremal distribution
        (param, x, cdf_expwbl, pdf_expwbl) = theoretical_fit.fit_distribution(
            data_cycles,
            fit_type=fun_cycles[0].name,
            x_min=min(data_cycles),
            x_max=2 * max(data_cycles),
            n_points=1000)
        par0_cycles = list()
        par0_cycles.append(np.asarray(param))
        # GUARDO LOS PARAMETROS
        np.save(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'), par0_cycles)

        # Check the goodness of the fit
        fig1 = plt.figure(figsize=(20, 20))
        ax = plt.axes()
        ax.plot(ecdf.index, ecdf, '.')
        ax.plot(x, cdf_expwbl)
        ax.set_xlabel('Q (m3/s)')
        ax.set_ylabel('CDF')
        ax.legend([
            'ECDF',
            'Exponweib Fit',
        ])
        ax.grid()
        ax.set_xlim([0, 500])
        fig1.savefig(
            os.path.join('output', 'analisis', 'graficas',
                         'cdf_fit_ciclos_river_discharge.png'))

        # PP - Plot values
        (yppplot_emp,
         yppplot_teo) = theoretical_fit.pp_plot(x, cdf_expwbl, ecdf)
        # QQ - Plot values
        (yqqplot_emp,
         yqqplot_teo) = theoretical_fit.qq_plot(x, cdf_expwbl, ecdf)
        # Plot Goodness of fit
        theoretical_fit.plot_goodness_of_fit(cdf_expwbl, ecdf, river_discharge,
                                             'Q', x, yppplot_emp, yqqplot_emp,
                                             yppplot_teo, yqqplot_teo)

        # Non-stationary fit for calms
        par_cycles, mod_cycles, f_mix_cycles, data_graph_cycles = list(), list(
        ), list(), list()
        df = list()
        df.append(data_cycles)
        for i in range(len(df)):
            # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO
            analisis_ = analisis.analisis(df[i],
                                          fun_cycles[i],
                                          ant[i],
                                          ordg=no_ord_cycles[i],
                                          nnorm=no_norm_cycles[i],
                                          par0=par0_cycles[i])

            par_cycles.append(analisis_[0])
            mod_cycles.append(analisis_[1])
            f_mix_cycles.append(analisis_[2])

            aux = list(analisis_[3])
            aux[5] = i
            aux = tuple(aux)
            data_graph_cycles.append(aux)

            # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL)
            plot_analisis.cuantiles_ne(*data_graph_cycles[i])
            plt.pause(0.5)

        fig2 = plt.figure(figsize=(20, 20))
        plt.plot(x, pdf_expwbl)
        _ = plt.hist(data_cycles,
                     bins=np.linspace(0, 500, 100),
                     normed=True,
                     alpha=0.5)
        plt.xlim([0, 400])
        fig2.savefig(
            os.path.join('output', 'analisis', 'graficas',
                         'pdf_fit_ciclos_river_discharge.png'))

        # %%  THEORETICAL FIT CALMS
        param0_calms = list()
        data_calms = sample_calms['Q']
        (param, x, cdf, pdf) = theoretical_fit.fit_distribution(
            data_calms,
            fit_type=fun_calms[0].name,
            x_min=np.min(data_calms),
            x_max=1.1 * np.max(data_calms),
            n_points=1000)
        param0_calms.append(np.asarray(param))
        # Empirical cdf
        ecdf = empirical_distributions.ecdf_histogram(data_calms)
        epdf = empirical_distributions.epdf_histogram(data_calms, bins=0)
        # PP - Plot values
        (yppplot_emp, yppplot_teo) = theoretical_fit.pp_plot(x, cdf, ecdf)
        # QQ - Plot values
        (yqqplot_emp, yqqplot_teo) = theoretical_fit.qq_plot(x, cdf, ecdf)
        # Plot Goodness of fit
        theoretical_fit.plot_goodness_of_fit(cdf, ecdf, sample_calms, 'Q', x,
                                             yppplot_emp, yqqplot_emp,
                                             yppplot_teo, yqqplot_teo)

        # Non-stationary fit for calms
        par_calms, mod_calms, f_mix_calms, data_graph_calms = list(), list(
        ), list(), list()
        df = list()
        df.append(data_calms)
        for i in range(len(df)):
            # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO
            analisis_ = analisis.analisis(df[i],
                                          fun_calms[i],
                                          ant[i],
                                          ordg=no_ord_calms[i],
                                          nnorm=no_norm_calms[i],
                                          par0=param0_calms[i])

            par_calms.append(analisis_[0])
            mod_calms.append(analisis_[1])
            f_mix_calms.append(analisis_[2])
            data_graph_calms.append(analisis_[3])

            # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL)
            plot_analisis.cuantiles_ne(*data_graph_calms[i])
            plt.pause(0.5)

        # Guardo parametros
        np.save(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'), par_calms)
        np.save(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'), mod_calms)
        np.save(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'), f_mix_calms)

    #%% TEMPORAL DEPENDENCY
    if temporal_dependency:
        # SE UTILIZAN LOS PARAMETROS DE SALIDA DEL ANÁLISIS PREVIO
        # Lectura de datos
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        par_calms = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'))
        mod_calms = np.load(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'))
        f_mix_calms = np.load(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'))

        (df_dt_cycles,
         cdf_) = analisis.dependencia_temporal(sample_cycles, par_cycles,
                                               mod_cycles, no_norm_cycles,
                                               f_mix_cycles, fun_cycles)

        # SE GUARDAN LOS PARAMETROS DEL MODELO VAR
        df_dt_cycles.to_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))

        (df_dt_calms,
         cdf_) = analisis.dependencia_temporal(sample_calms, par_calms,
                                               mod_calms, no_norm_calms,
                                               f_mix_calms, fun_calms)

        # SE GUARDAN LOS PARAMETROS DEL MODELO VAR
        df_dt_calms.to_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_calms.p'))

    if climatic_events_fitting:
        #%% FIT NUMBER OF EVENTS DURING WET CYCLES
        events_wet_cycle = pd.Series([5, 2, 1, 3, 2, 2, 0, 6, 1])
        ecdf_events_wet_cycle = empirical_distributions.ecdf_histogram(
            events_wet_cycle)

        mu = np.mean(events_wet_cycle)
        simulated_number_events = pd.Series(
            poisson.rvs(mu, loc=0, size=100, random_state=None))
        ecdf_simulated_events_wet_cycle = empirical_distributions.ecdf_histogram(
            simulated_number_events)
        x_poisson = np.linspace(0, 10, 100)
        cdf_poisson = poisson.cdf(x_poisson, mu, loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_events_wet_cycle.index, ecdf_events_wet_cycle, '.')
        ax.plot(ecdf_simulated_events_wet_cycle.index,
                ecdf_simulated_events_wet_cycle, '.')
        ax.plot(x_poisson, cdf_poisson)
        ax.legend(['ECDF', 'ECDF Sim', 'Poisson Fit'])
        ax.grid()

        #%% FIT TIME BETWEEN WET CYCLES
        t_wet_cycles = peaks_over_thres_spei.index.to_series().diff().dropna(
        ).astype('m8[s]').astype(np.float32)
        ecdf_t_wet_cycle = empirical_distributions.ecdf_histogram(t_wet_cycles)

        norm_param = norm.fit(t_wet_cycles, loc=0)
        simulated_t_wet_cycles = pd.Series(
            norm.rvs(*norm_param, size=100, random_state=None))
        ecdf_simulated_t_wet_cycles = empirical_distributions.ecdf_histogram(
            simulated_t_wet_cycles)
        x_norm = np.linspace(0, 2 * max(t_wet_cycles), 100)
        cdf_norm = norm.cdf(x_norm, *norm_param)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_wet_cycle.index, ecdf_t_wet_cycle, '.')
        ax.plot(ecdf_simulated_t_wet_cycles.index, ecdf_simulated_t_wet_cycles,
                '.')
        ax.plot(x_norm, cdf_norm)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_wet_cycles_days = simulated_t_wet_cycles.astype('m8[s]')
        # Elimino valores negativos
        simulated_t_wet_cycles_days = simulated_t_wet_cycles_days[
            simulated_t_wet_cycles_days.values > datetime.timedelta(days=1)]

        #%% FIT TIME BETWEEN EVENTS DURING WET CYCLES
        t_between_events = peaks_over_thres.index.to_series().diff().dropna()
        t_between_events = t_between_events[
            t_between_events < datetime.timedelta(days=400)]
        t_between_events = t_between_events.astype('m8[s]').astype(np.float32)
        ecdf_t_between_events = empirical_distributions.ecdf_histogram(
            t_between_events)

        lambda_par = expon.fit(t_between_events, loc=0)
        simulated_t_between_events = pd.Series(
            expon.rvs(scale=lambda_par[1], size=100, random_state=None))
        ecdf_simulated_t_between_events = empirical_distributions.ecdf_histogram(
            simulated_t_between_events)
        x_expon = np.linspace(0, 2 * max(t_between_events), 100)
        cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_between_events.index, ecdf_t_between_events, '.')
        ax.plot(ecdf_simulated_t_between_events.index,
                ecdf_simulated_t_between_events, '.')
        ax.plot(x_expon, cdf_expon)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_between_events_days = simulated_t_between_events.astype(
            'm8[s]')

        #%% FIT TIME BETWEEN ALL EVENTS
        # Fit time between events (without considering wet cycles) 2 method
        t_between_events_2method = peaks_over_thres.index.to_series().diff(
        ).dropna()
        t_between_events_2method = t_between_events_2method.astype(
            'm8[s]').astype(np.float32)
        ecdf_t_between_events_2method = empirical_distributions.ecdf_histogram(
            t_between_events_2method)

        lambda_par = expon.fit(t_between_events_2method, loc=0)
        simulated_t_between_events_2method = pd.Series(
            expon.rvs(scale=lambda_par[1], size=100, random_state=None))
        ecdf_simulated_t_between_events_2method = empirical_distributions.ecdf_histogram(
            simulated_t_between_events_2method)
        x_expon = np.linspace(0, 2 * np.max(t_between_events_2method), 100)
        cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_between_events_2method.index,
                ecdf_t_between_events_2method, '.')
        ax.plot(ecdf_simulated_t_between_events_2method.index,
                ecdf_simulated_t_between_events_2method, '.')
        ax.plot(x_expon, cdf_expon)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_between_events_2method_days = simulated_t_between_events.astype(
            'm8[s]')
        # nul_values = simulated_t_between_events_2method_days.values > datetime.timedelta(days=2000)

    #%% SIMULACION CLIMÁTICA CHEQUEO UMBRAL OPTIMO PARA AJUSTAR DURACIONES
    if threshold_checking_for_simulation:
        # CARGO PARÁMETROS
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        df_dt_cycles = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))
        vars_ = ['Q']

        # Cargo el SPEI Index para ajustar tiempo entre ciclos humedos, numero de eventos por ciclo humedo
        # tiempo entre eventos dentro de ciclo humedo

        # Figura de las cdf y pdf empiricas
        fig1, axes1 = plt.subplots(1, 2, figsize=(20, 7))

        cont = 0
        iter = 0
        while cont < no_sim:
            df_sim = simulacion.simulacion(anocomienzo,
                                           duracion,
                                           par_cycles,
                                           mod_cycles,
                                           no_norm_cycles,
                                           f_mix_cycles,
                                           fun_cycles,
                                           vars_,
                                           sample_cycles,
                                           df_dt_cycles, [0, 0, 0, 0, 0],
                                           semilla=int(
                                               np.random.rand(1) * 1e6))

            iter += 1

            # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie
            if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25:
                # Representacion de la serie
                plt.figure()
                ax = plt.axes()
                ax.plot(df_sim)
                ax.plot(sample_cycles, '.')
                ax.plot(df_sim * 0 + max(sample_cycles['Q']), 'r')
                ax.grid()

                # Cdf Pdf
                data = df_sim['Q']
                ecdf = empirical_distributions.ecdf_histogram(data)
                epdf = empirical_distributions.epdf_histogram(data, bins=0)
                axes1[0].plot(epdf.index, epdf, '--', color='0.75')
                axes1[1].plot(ecdf.index, ecdf, '--', color='0.75')

                # Extract cycles from data for different thresholds to fix the duration
                fig2, axes2 = plt.subplots(1, 2, figsize=(20, 7))
                if cont == 0:
                    dur_cycles = dur_cycles.astype('m8[s]').astype(
                        np.float32)  # Convierto a segundos y flotante
                ecdf_dur = empirical_distributions.ecdf_histogram(dur_cycles)
                epdf_dur = empirical_distributions.epdf_histogram(dur_cycles,
                                                                  bins=0)
                axes2[0].plot(epdf_dur.index, epdf_dur, 'r', lw=2)
                axes2[1].plot(ecdf_dur.index, ecdf_dur, 'r', lw=2)

                threshold = np.arange(20, 110, 10)
                color_sequence = [
                    '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
                    '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
                    '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
                    '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5'
                ]
                for j, th in enumerate(threshold):
                    minimum_interarrival_time = pd.Timedelta('1 hour')
                    minimum_cycle_length = pd.Timedelta('2 days')
                    cycles, calm_periods, info = extremal.extreme_events(
                        df_sim, 'Q', th, minimum_interarrival_time,
                        minimum_cycle_length, interpolation,
                        interpolation_method, interpolation_freq, truncate,
                        extra_info)

                    # Calculate duration of the cycles
                    dur_cycles_sim = extremal.events_duration(cycles)
                    dur_cycles_sim_description = dur_cycles_sim.describe()

                    # Represent cycles
                    fig3 = plt.figure(figsize=(20, 20))
                    ax = plt.axes()
                    ax.plot(df_sim)
                    ax.axhline(th, color='lightgray')
                    ax.grid()
                    ax.legend([
                        'Threshold: ' + str(th) + ' (m3/s)' + '/ Dur_min ' +
                        str(dur_cycles_description['min']) + ' - ' +
                        str(dur_cycles_sim_description['min']) +
                        '/ Dur_mean ' + str(dur_cycles_description['mean']) +
                        ' - ' + str(dur_cycles_sim_description['mean']) +
                        '/ Dur_max ' + str(dur_cycles_description['max']) +
                        ' - ' + str(dur_cycles_sim_description['max'])
                    ])

                    for cycle in cycles:
                        ax.plot(cycle, 'g', marker='.', markersize=5)
                        ax.plot(cycle.index[0],
                                cycle[0],
                                'gray',
                                marker='.',
                                markersize=10)
                        ax.plot(cycle.index[-1],
                                cycle[-1],
                                'black',
                                marker='.',
                                markersize=10)
                    ax.set_xlim([
                        datetime.date(2018, 04, 01),
                        datetime.date(2030, 01, 01)
                    ])
                    ax.set_ylim([0, 600])

                    fig_name = 'ciclos_sim_' + str(cont) + '_threshold_' + str(
                        th) + '.png'
                    fig3.savefig(
                        os.path.join('output', 'simulacion', 'graficas',
                                     'descarga_fluvial', 'umbral_optimo',
                                     fig_name))

                    # Calculate the cdf and pdf of the cycle duration
                    dur_cycles_sim = dur_cycles_sim.astype('m8[s]').astype(
                        np.float32)
                    ecdf_dur_sim = empirical_distributions.ecdf_histogram(
                        dur_cycles_sim)
                    epdf_dur_sim = empirical_distributions.epdf_histogram(
                        dur_cycles_sim, bins=0)
                    axes2[0].plot(epdf_dur_sim.index,
                                  epdf_dur_sim,
                                  '--',
                                  color=color_sequence[j],
                                  label=['Threshold: ' + str(threshold[j])])
                    axes2[1].plot(ecdf_dur_sim.index,
                                  ecdf_dur_sim,
                                  '--',
                                  color=color_sequence[j],
                                  label=['Threshold: ' + str(threshold[j])])
                    axes2[0].legend()
                    axes2[1].set_xlim([0, 5000000])
                    axes2[0].set_xlim([0, 5000000])

                fig_name = 'ciclos_dur_sim_' + str(cont) + '.png'
                fig2.savefig(
                    os.path.join('output', 'simulacion', 'graficas',
                                 'descarga_fluvial', 'umbral_optimo',
                                 fig_name))

                cont += 1

            data = sample_cycles['Q']
            ecdf = empirical_distributions.ecdf_histogram(data)
            epdf = empirical_distributions.epdf_histogram(data, bins=0)
            axes1[0].plot(epdf.index, epdf, 'r', lw=2)
            axes1[1].plot(ecdf.index, ecdf, 'r', lw=2)

        fig_name = 'pdf_cdf_descarga_fluvial.png'
        fig1.savefig(
            os.path.join('output', 'simulacion', 'graficas',
                         'descarga_fluvial', 'umbral_optimo', fig_name))

    #%% SIMULACION CLIMATICA
    threshold = 50
    minimum_interarrival_time = pd.Timedelta('1 hour')
    minimum_cycle_length = pd.Timedelta('2 days')
    if simulation_cycles:
        # CARGO PARÁMETROS
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        par_calms = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'))
        mod_calms = np.load(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'))
        f_mix_calms = np.load(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'))

        df_dt_cycles = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))
        df_dt_calms = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_calms.p'))
        vars_ = ['Q']

        # Figura de las cdf y pdf empiricas
        fig2, axes1 = plt.subplots(1, 2, figsize=(20, 7))

        cont = 0
        iter = 0
        while cont < no_sim:
            df_sim = simulacion.simulacion(anocomienzo,
                                           duracion,
                                           par_cycles,
                                           mod_cycles,
                                           no_norm_cycles,
                                           f_mix_cycles,
                                           fun_cycles,
                                           vars_,
                                           sample_cycles,
                                           df_dt_cycles, [0, 0, 0, 0, 0],
                                           semilla=int(
                                               np.random.rand(1) * 1e6))

            iter += 1

            # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie
            if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25:
                df_sim = df_sim.resample('1H').interpolate()

                # Extract cycles from data for different thresholds to fix the duration
                if cont == 0:
                    dur_cycles = dur_cycles.astype('m8[s]').astype(
                        np.float32)  # Convierto a segundos y flotante
                # Calculate cycles
                cycles, calm_periods, info = extremal.extreme_events(
                    df_sim, 'Q', threshold, minimum_interarrival_time,
                    minimum_cycle_length, interpolation, interpolation_method,
                    interpolation_freq, truncate, extra_info)

                # # Represent cycles
                # fig3 = plt.figure(figsize=(20, 20))
                # ax = plt.axes()
                # ax.plot(df_sim)
                # ax.axhline(threshold, color='lightgray')
                # ax.grid()
                #
                # for cycle in cycles:
                #     ax.plot(cycle, 'g', marker='.', markersize=5)
                #     ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
                #     ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
                # ax.set_xlim([datetime.date(2018, 01, 01), datetime.date(2021, 01, 01)])
                # ax.set_ylim([0, 600])
                # fig3.savefig(os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial',
                #                           'ciclos_cadiz_simulado_' + str(cont).zfill(4) + '.png'))

                # Start to construct the time series
                indices = pd.date_range(start='2018', end='2100', freq='1H')
                df_simulate = pd.DataFrame(np.zeros((len(indices), 1)) + 25,
                                           dtype=float,
                                           index=indices,
                                           columns=['Q'])

                # The start is in wet cycles
                cont_wet_cicles = 0
                cont_df_events = 1
                t_ini = datetime.datetime(2018, 01, 01)
                t_end = datetime.datetime(2018, 01, 01)
                while t_end < datetime.datetime(2090, 01, 01):
                    if cont_wet_cicles != 0:
                        t_ini = t_end + simulated_t_wet_cycles_days[
                            cont_wet_cicles]
                        year = t_ini.year
                    else:
                        year = 2018

                    # Select the number of events during wet cycle
                    n_events = simulated_number_events[cont_wet_cicles] - 1
                    cont_wet_cicles += 1

                    if n_events != 0:

                        # for j in range(0, n_events):
                        cont_df_events_in_wet_cycles = 0
                        while cont_df_events_in_wet_cycles <= n_events:
                            if cont_df_events_in_wet_cycles != 0:
                                # Time between events
                                year = year + 1

                            # Select the event
                            cycle = cycles[cont_df_events]

                            if np.max(cycle) >= 150:
                                # Simulate date
                                month1 = [
                                    random.randint(1, 3),
                                    random.randint(10, 12)
                                ]
                                rand_pos = random.randint(0, 1)
                                month = month1[rand_pos]
                                day = random.randint(1, 28)
                                hour = random.randint(0, 23)
                            else:
                                # Simulate date
                                month = random.randint(1, 12)
                                day = random.randint(1, 28)
                                hour = random.randint(0, 23)
                            t_ini = datetime.datetime(year, month, day, hour)
                            pos_ini = np.where(
                                df_simulate.index == t_ini)[0][0]
                            pos_end = pos_ini + cycle.shape[0]

                            # Insert cycle
                            df_simulate.iloc[pos_ini:pos_end, 0] = cycle.values
                            t_end = df_simulate.index[pos_end]
                            year = df_simulate.index[pos_end].to_datetime(
                            ).year
                            cont_df_events += 1
                            cont_df_events_in_wet_cycles += 1

                    else:
                        t_end = t_ini

                # Simulation of calm periods
                df_sim_calms = simulacion.simulacion(
                    anocomienzo,
                    85,
                    par_calms,
                    mod_calms,
                    no_norm_calms,
                    f_mix_calms,
                    fun_calms,
                    vars_,
                    sample_calms,
                    df_dt_calms, [0, 0, 0, 0, 0],
                    semilla=int(np.random.rand(1) * 1e6))

                # Remove negative values
                df_sim_calms[df_sim_calms < 0] = np.random.randint(1, 5)

                # Combine both dataframes with cycles and calms
                pos_cycles = df_simulate >= 50
                df_river_discharge = df_sim_calms
                df_river_discharge[pos_cycles] = df_simulate

                # Hourly interpolation
                df_river_discharge = df_river_discharge.resample(
                    'H').interpolate()

                # Representation of results
                fig1 = plt.figure(figsize=(20, 10))
                ax = plt.axes()
                ax.plot(river_discharge)
                ax.plot(df_river_discharge)
                ax.legend('Hindcast', 'Forecast')
                ax.grid()
                ax.set_ylim([-5, 500])
                fig1.savefig(
                    os.path.join(
                        'output', 'simulacion', 'graficas', 'descarga_fluvial',
                        'descarga_fluvial_cadiz_simulado_' +
                        str(cont).zfill(4) + '.png'))

                # Cdf Pdf
                data = df_river_discharge['Q']
                ecdf = empirical_distributions.ecdf_histogram(data)
                epdf = empirical_distributions.epdf_histogram(data, bins=0)
                axes1[0].plot(epdf.index, epdf, '--', color='0.75')
                axes1[1].plot(ecdf.index, ecdf, '--', color='0.75')

                # Guardado de ficheros
                df_river_discharge.to_csv(os.path.join(
                    'output', 'simulacion', 'series_temporales',
                    'descarga_fluvial_500', 'descarga_fluvial_guadalete_sim_' +
                    str(cont).zfill(4) + '.txt'),
                                          sep=n(b'\t'))
                cont += 1

        data = river_discharge['Q']
        ecdf = empirical_distributions.ecdf_histogram(data)
        epdf = empirical_distributions.epdf_histogram(data, bins=0)
        axes1[0].plot(epdf.index, epdf, 'r', lw=2)
        axes1[1].plot(ecdf.index, ecdf, 'r', lw=2)
        fig_name = 'pdf_cdf_descarga_fluvial.png'
        fig2.savefig(
            os.path.join('output', 'simulacion', 'graficas',
                         'descarga_fluvial', fig_name))
Exemplo n.º 9
0
def output_missing_values(modf, info, output_path):
    elements = []

    # Section title
    default_title = _('Missing values')

    # Required values
    location = info['location_metocean']
    driver = info['name_driver']
    descriptor = info['name_descriptor']
    block = info['name_block']
    section = info['name_section']

    # Optional values and default values
    title = get_key(info, 'title_section', default_title)
    var_name = get_key(info, 'var_name_descriptor', descriptor)
    var_unit = get_key(info, 'unit_descriptor', '')
    circular = get_key(info, 'circular_descriptor', False)

    # Input tex section
    input_tex(elements, info, output_path, section)

    # Computation
    data = extract_data(modf, descriptor_name=descriptor)

    time_step = missing_values.find_timestep(data)
    data_gaps = missing_values.find_missing_values(data, time_step)

    # Figure
    kind = 'figure'
    default_caption = _('Missing values plot') + ': {}'.format(
        info['title_descriptor'])
    caption = get_key(info, 'missing_values_figure_caption_section',
                      default_caption)

    path = get_output_name(location=location,
                           driver=driver,
                           name=descriptor,
                           block=block,
                           title=section,
                           kind=kind)

    missing_values.plot_missing_values(data=data,
                                       data_gaps=data_gaps,
                                       title='',
                                       var_name=var_name,
                                       var_unit=var_unit,
                                       fig_filename=os.path.join(
                                           output_path, path),
                                       circular=circular,
                                       label=var_name)

    elements.append([path, kind, caption])

    # Table
    kind = 'table'
    default_caption = _('Missing values table') + ': {}'.format(
        info['title_descriptor'])
    caption = get_key(info, 'missing_values_table_caption_section',
                      default_caption)

    path = get_output_name(location=location,
                           driver=driver,
                           name=descriptor,
                           block=block,
                           title=section,
                           kind=kind)

    data_gaps_report = missing_values.missing_values_report(data, data_gaps)
    save_table(data_gaps_report, os.path.join(output_path, path))

    elements.append([os.path.join(output_path, path), kind, caption])

    return pd.DataFrame(elements, columns=['path', 'kind', 'caption']), title