Пример #1
0
def test_save_to_msgpack():

    data_file = os.path.join('simar', 'SIMAR_1052046')
    file_name = 'full_simar_preprocessed.msg'
    path = 'D:\\REPOSITORIO GIT\\protocol_project\\data\\intermediate_files'

    # Read simar
    data_simar, _ = read.simar(data_file, tests.full_data_path)

    # Preproccesing simar
    time_step = missing_values.find_timestep(data_simar, n_random_values=10)
    data_clean = missing_values.erase_null_values(data_simar, method='all')
    data_simar_interp = missing_values.fill_missing_values(
        data_clean,
        time_step,
        technique='interpolation',
        method='nearest',
        limit=720,
        limit_direction='both')
    # Check missing values
    miss_values = missing_values.find_missing_values(data_simar_interp,
                                                     time_step)

    # Save simar
    if miss_values.empty:
        save_to_msgpack(data_simar_interp, file_name, path)

    return
Пример #2
0
def test_extreme_events_full_simar_with_interpolation():
    data_file = 'SIMAR_1052046'
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)

    var_name = 'Hm0'
    threshold = np.percentile(data_simar[var_name], 95)
    minimum_interarrival_time = pd.Timedelta('3 days')
    minimum_cycle_length = pd.Timedelta('3 hours')

    cycles, calm_periods = extremal.extreme_events(data_simar,
                                                   var_name,
                                                   threshold,
                                                   minimum_interarrival_time,
                                                   minimum_cycle_length,
                                                   interpolation=True)

    test = extremal.events_boundaries(cycles)
    maximum = extremal.events_max(cycles)

    # frequency = pd.Series(data.index).diff().min()
    duration = extremal.events_duration(cycles)

    magnitude = extremal.events_magnitude(cycles, threshold)

    # noinspection PyTypeChecker
    print(len(cycles) == len(calm_periods))
Пример #3
0
def test_min_cycles_duration():
    # Read data
    data_file = 'SIMAR_1052046'
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)

    # Input
    threshold = np.percentile(data_simar['Hm0'], 95)
    minimum_interarrival_time = pd.Timedelta('3 days')
    minimum_cycle_length = pd.Timedelta('3 hours')

    # Cycles calculation
    cycles, calm_periods = extremal.extreme_events(data_simar, 'Hm0',
                                                   threshold,
                                                   minimum_interarrival_time,
                                                   minimum_cycle_length)

    # Cycles duration
    cycles_duration = extremal.events_duration(cycles)

    # Check if the min duration is equal to the set duration threshold
    min_cycles_duration = cycles_duration.min()

    # Find the cycles with duration less than the threshold
    cont = 0
    list_wrong_cycles = []
    for cycle in cycles_duration:
        if cycle == pd.Timedelta('2 hours'):
            list_wrong_cycles.append(cont)
        cont += 1

    assert min_cycles_duration == minimum_cycle_length
Пример #4
0
def test_fill_missing_values():
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path)

    # Calculate the time step
    time_step = missing_values.find_timestep(data_simar)

    # Fill missing values
    data_fill = missing_values.fill_missing_values(data_simar,
                                                   time_step,
                                                   technique='interpolation',
                                                   method='nearest',
                                                   limit=24,
                                                   limit_direction='both')

    tolerance = 0.01
    assert data_fill.loc['1958-01-04 08',
                         'Hm0'] == pytest.approx(2.1, tolerance)
    assert data_fill.loc['1958-01-04 12',
                         'Tp'] == pytest.approx(10.5, tolerance)
    assert data_fill.loc['1958-01-04 14',
                         'Tp'] == pytest.approx(10.6, tolerance)
    assert data_fill.loc['1960-12-31 19',
                         'Hm0'] == pytest.approx(0.6, tolerance)
Пример #5
0
def test_missing_values_report():
    # Input
    file_name = 'gaps_report.csv'
    path = os.path.join('.', '..', '..', 'report', 'tests', 'output', 'tables')
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path)

    # Calculate the time step
    time_step = missing_values.find_timestep(data_simar)
    # Find gaps
    data_gaps = missing_values.find_missing_values(data_simar, time_step)
    # Gaps report
    data_gaps_report = missing_values.missing_values_report(
        data_simar, data_gaps)
    missing_values.missing_values_report_to_file(data_gaps_report, file_name,
                                                 path)
    # Plot
    missing_values.plot_missing_values(data=data_simar,
                                       data_column='Hm0',
                                       data_gaps=data_gaps,
                                       title='',
                                       var_name='Hm0',
                                       var_unit='m',
                                       fig_filename='',
                                       circular=False,
                                       label='Hm0')
Пример #6
0
def test_find_timestep():
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short', data_path)

    t_step = missing_values.find_timestep(data_simar)

    assert t_step == pd.timedelta(hours=1)
Пример #7
0
def test_erase_null_values():
    # Define null values
    method = 'any'

    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path)

    # Erase null values
    data = missing_values.erase_null_values(data_simar, method)

    assert '1958-01-04 08' not in data.index
    assert '1958-01-04 19' not in data.index
    assert '1960-12-31 18' not in data.index
Пример #8
0
def test_find_missing_values():
    # Read simar file
    data_path = os.path.join(tests.sample_data_path, 'simar')
    # noinspection PyTypeChecker
    data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path)

    # Calculate the time step
    time_step = missing_values.find_timestep(data_simar)

    data_gaps = missing_values.find_missing_values(data_simar, time_step)

    # Representation of the gaps
    fig = plt.figure()
    ax = fig.gca()

    ax.plot(data_simar.loc[:, 'Hm0'])
    ax.plot(data_simar.loc[data_gaps.loc[:, 'pos_ini'], 'Hm0'],
            'k.',
            markersize=10)
    ax.plot(data_simar.loc[data_gaps.loc[:, 'pos_fin'], 'Hm0'],
            'k.',
            markersize=10)

    fig.show()
Пример #9
0
def test_gev_fit_to_annual_maxima_confidence_bands():
    # Inputs
    data_file = 'SIMAR_1052046'
    threshold_percentile = 95
    n_sim_boot = 100
    alpha = 0.05  # Confidence level

    # Read SIMAR
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)
    threshold = np.percentile(data_simar.loc[:, 'Hm0'], threshold_percentile)

    # Calculation of the annual maxima sample
    annual_maxima = extremal.annual_maxima_calculation(data_simar['Hm0'])

    # Annual Maxima Empirical distribution
    ecdf_am = empirical_distributions.ecdf_histogram(annual_maxima)
    ecdf_am_rp = extremal.return_period_curve(1, ecdf_am)

    # Fit Annual Maxima to GEV
    (param_orig, x_gev, y_gev, y_gev_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=annual_maxima,
        threshold=threshold,
        fit_type='gev',
        x_min=0.90 * min(annual_maxima),
        x_max=1.5 * max(annual_maxima),
        n_points=1000,
        cumulative=True)
    # Add confidence bands to asses the uncertainty (Bootstrapping)
    boot_extreme = extremal.extremal_distribution_fit_bootstrapping(
        sample=annual_maxima,
        n_sim_boot=n_sim_boot,
        data=data_simar,
        var_name='Hm0',
        threshold=threshold,
        param_orig=param_orig,
        fit_type='gev',
        x_min=0.90 * min(annual_maxima),
        x_max=1.5 * max(annual_maxima),
        alpha=alpha)

    # Representation
    extremal.plot_extremal_cdf(x_gev,
                               y_gev,
                               ecdf_am,
                               n_sim_boot,
                               boot_extreme,
                               alpha,
                               title='',
                               var_name='Hm0',
                               var_unit='m',
                               fig_filename='',
                               circular=False,
                               extremal_label='GEV Fit',
                               empirical_label='GEV ECDF')

    extremal.plot_extremal_return_period(x_gev,
                                         y_gev_rp,
                                         ecdf_am_rp,
                                         n_sim_boot,
                                         boot_extreme,
                                         alpha,
                                         title='',
                                         var_name='Hm0',
                                         var_unit='m',
                                         fig_filename='',
                                         circular=False,
                                         extremal_label='GEV Fit',
                                         empirical_label='GEV ECDF')
Пример #10
0
def test_gpd_fit_to_pot_confidence_bands():
    # Inputs
    data_file = 'SIMAR_1052046'
    threshold_percentile = 95
    minimum_interarrival_time = pd.Timedelta('3 days')
    minimum_cycle_length = pd.Timedelta('3 hours')
    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = False
    extra_info = False
    n_sim_boot = 100
    alpha = 0.05  # Confidence level

    # Read SIMAR
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)
    threshold = np.percentile(data_simar.loc[:, 'Hm0'], threshold_percentile)

    # Storm cycles calculation
    cycles, calm_periods = extremal.extreme_events(
        data_simar, 'Hm0', threshold, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Peaks over threshold
    peaks_over_thres = extremal.events_max(cycles)

    # POT Empirical distribution
    ecdf_pot = empirical_distributions.ecdf_histogram(peaks_over_thres)
    n_peaks_year = len(peaks_over_thres) / len(
        data_simar['Hm0'].index.year.unique())
    ecdf_pot_rp = extremal.return_period_curve(n_peaks_year, ecdf_pot)

    # Fit POT to Scipy-GPD
    (param_orig, x_gpd, y_gpd, y_gpd_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=peaks_over_thres,
        threshold=threshold,
        fit_type='gpd',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        n_points=1000,
        cumulative=True)
    # Add confidence bands to asses the uncertainty (Bootstrapping)
    boot_extreme = extremal.extremal_distribution_fit_bootstrapping(
        sample=peaks_over_thres,
        n_sim_boot=n_sim_boot,
        data=data_simar,
        var_name='Hm0',
        threshold=threshold,
        param_orig=param_orig,
        fit_type='gpd',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        alpha=alpha)

    # Representation
    extremal.plot_extremal_cdf(x_gpd,
                               y_gpd,
                               ecdf_pot,
                               n_sim_boot,
                               boot_extreme,
                               alpha,
                               title='',
                               var_name='Hm0',
                               var_unit='m',
                               fig_filename='',
                               circular=False,
                               extremal_label='GPD Fit',
                               empirical_label='POT ECDF')

    extremal.plot_extremal_return_period(x_gpd,
                                         y_gpd_rp,
                                         ecdf_pot_rp,
                                         n_sim_boot,
                                         boot_extreme,
                                         alpha,
                                         title='',
                                         var_name='Hm0',
                                         var_unit='m',
                                         fig_filename='',
                                         circular=False,
                                         extremal_label='GPD Fit',
                                         empirical_label='POT ECDF')
Пример #11
0
def test_poisson_pareto_fit_to_pot_and_gev_fit_to_annual_maxima():
    # Inputs
    data_file = 'SIMAR_1052046'
    threshold_percentile = 95
    minimum_interarrival_time = pd.Timedelta('3 days')
    minimum_cycle_length = pd.Timedelta('3 hours')
    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = False
    extra_info = False

    # Read SIMAR
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)
    threshold = np.percentile(data_simar.loc[:, 'Hm0'], threshold_percentile)

    # Storm cycles calculation
    cycles, calm_periods = extremal.extreme_events(
        data_simar, 'Hm0', threshold, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Peaks over threshold
    peaks_over_thres = extremal.events_max(cycles)

    # Calculation of the annual maxima sample
    annual_maxima = extremal.annual_maxima_calculation(data_simar['Hm0'])

    # POT Empirical distribution
    ecdf_pot = empirical_distributions.ecdf_histogram(peaks_over_thres)
    n_peaks_year = len(peaks_over_thres) / len(
        data_simar['Hm0'].index.year.unique())
    ecdf_pot_rp = extremal.return_period_curve(n_peaks_year, ecdf_pot)

    # Annual Maxima Empirical distribution
    ecdf_am = empirical_distributions.ecdf_histogram(annual_maxima)
    ecdf_am_rp = extremal.return_period_curve(1, ecdf_am)

    # Fit Annual Maxima to GEV
    (param, x_gev, y_gev, y_gev_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=annual_maxima,
        threshold=None,
        fit_type='gev',
        x_min=0.90 * min(annual_maxima),
        x_max=1.5 * max(annual_maxima),
        n_points=1000,
        cumulative=True)

    # Fit Peaks over threshold to Poisson Pareto
    (param, x_pp, y_pp, y_pp_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=peaks_over_thres,
        threshold=threshold,
        fit_type='poisson',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        n_points=1000,
        cumulative=True)

    # Represent results
    plt.figure()
    ax = plt.axes()
    ax.plot(ecdf_am.index, ecdf_am, '.k', label='Annual maxima ECDF')
    ax.plot(x_gev, y_gev, 'k', label='GEV fit')
    ax.plot(x_pp, y_pp, label='Poisson-Pareto fit')
    plt.xlabel('Hm0 (m)')
    plt.ylabel('CDF')
    ax.legend()
    plt.grid()
    plt.show()

    plt.figure()
    ax = plt.axes()
    ax.semilogx(ecdf_am_rp, ecdf_am_rp.index, '.k', label='Annual maxima ECDF')
    ax.semilogx(y_gev_rp, x_gev, 'k', label='GEV fit')
    ax.semilogx(y_pp_rp, x_pp, label='Poisson-Pareto fit')
    plt.xlim(0, 500)
    plt.xlabel('Return Period (years)')
    plt.ylabel('Hm0 (m)')
    ax.legend()
    plt.grid()
    plt.show()
Пример #12
0
def read_full_simar(data_file=full_data_file, data_path=full_data_path):
    # noinspection PyTypeChecker
    data_simar, code = read.simar(data_file, data_path)

    return data_simar
Пример #13
0
def read_sample_simar(data_file=sample_data_file, data_path=sample_data_path):
    # noinspection PyTypeChecker
    data_simar, code = read.simar(data_file, data_path)

    return data_simar
Пример #14
0
def test_simar_data():
    data_simar, _ = read.simar(tests.sample_data_file, tests.sample_data_path)

    assert data_simar.shape[0] == 216
    assert data_simar.ix[153, 'Tp'] == 11.2
Пример #15
0
def test_simar_code():
    _, code = read.simar(tests.sample_data_file, tests.sample_data_path)

    assert code == '1052046'