def test_2d_fit(self): """ 2-d Fit with Weibull and Lognormal distribution. """ prng = np.random.RandomState(42) # Draw 1000 samples from a Weibull distribution with shape=1.5 and scale=3, # which represents significant wave height. sample_1 = prng.weibull(1.5, 1000) * 3 # Let the second sample, which represents spectral peak period increase # with significant wave height and follow a Lognormal distribution with # mean=2 and sigma=0.2 sample_2 = [ 0.1 + 1.5 * np.exp(0.2 * point) + prng.lognormal(2, 0.2) for point in sample_1 ] # Describe the distribution that should be fitted to the sample. dist_description_0 = { 'name': 'Weibull_3p', 'dependency': (None, None, None), 'width_of_intervals': 2 } dist_description_1 = { 'name': 'Lognormal', 'dependency': (None, None, 0), 'functions': (None, None, 'exp3') } # Compute the fit. my_fit = Fit((sample_1, sample_2), (dist_description_0, dist_description_1)) dist0 = my_fit.mul_var_dist.distributions[0] dist1 = my_fit.mul_var_dist.distributions[1] self.assertAlmostEqual(dist0.shape(0), 1.4165147571863412, places=5) self.assertAlmostEqual(dist0.scale(0), 2.833833521811032, places=5) self.assertAlmostEqual(dist0.loc(0), 0.07055663251419833, places=5) self.assertAlmostEqual(dist1.shape(0), 0.17742685807554776, places=5) #self.assertAlmostEqual(dist1.scale, 7.1536437634240135+2.075539206642004e^{0.1515051024957754x}, places=5) self.assertAlmostEqual(dist1.loc, None, places=5) # Now use a 2-parameter Weibull distribution instead of 3-p distr. dist_description_0 = { 'name': 'Weibull_2p', 'dependency': (None, None, None), 'width_of_intervals': 2 } dist_description_1 = { 'name': 'Lognormal', 'dependency': (None, None, 0), 'functions': (None, None, 'exp3') } my_fit = Fit((sample_1, sample_2), (dist_description_0, dist_description_1))
def test_min_number_datapoints_for_fit(self): """ Tests if the minimum number of datapoints required for a fit works. """ sample_hs, sample_tz, label_hs, label_tz = read_benchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), # Shape, Location, Scale, Shape2 'width_of_intervals': 0.5} dist_description_tz = {'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('exp3', None, 'lnsquare2'), # Shape, Location, Scale 'min_datapoints_for_fit': 10 } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) # Check whether the logarithmic square fit worked correctly. dist1 = fit.mul_var_dist.distributions[1] a_min_10 = dist1.scale.a # Now require more datapoints for a fit. dist_description_tz = {'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('exp3', None, 'lnsquare2'), # Shape, Location, Scale 'min_datapoints_for_fit': 500 } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) # Check whether the logarithmic square fit worked correctly. dist1 = fit.mul_var_dist.distributions[1] a_min_500 = dist1.scale.a # Because in case 2 fewer bins have been used we should get different # coefficients for the dependence function. self.assertNotEqual(a_min_10, a_min_500)
def test_weighting_of_dependence_function(self): """ Tests if using weights when the dependence function is fitted works correctly. """ sample_v, sample_hs, label_v, label_hs = read_benchmark_dataset(path='tests/testfiles/1year_dataset_D.txt') # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters' : (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 20, 'do_use_weights_for_dependence_function': False} # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) dist1_no_weights = fit.mul_var_dist.distributions[1] # Now perform a fit with weights. dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters' : (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 20, 'do_use_weights_for_dependence_function': True} # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) dist1_with_weights = fit.mul_var_dist.distributions[1] # Make sure the two fitted dependnece functions are different. d = np.abs(dist1_with_weights.scale(0) - dist1_no_weights.scale(0)) / \ np.abs(dist1_no_weights.scale(0)) self.assertGreater(d, 0.01) # Make sure they are not too different. d = np.abs(dist1_with_weights.scale(20) - dist1_no_weights.scale(20)) / \ np.abs(dist1_no_weights.scale(20)) self.assertLess(d, 0.5)
def test_fit_lnsquare2(self): """ Tests a 2D fit that includes an logarithm square dependence function. """ sample_hs, sample_tz, label_hs, label_tz = read_benchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), # Shape, Location, Scale, Shape2 'width_of_intervals': 0.5} dist_description_tz = {'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('exp3', None, 'lnsquare2') # Shape, Location, Scale } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) # Check whether the logarithmic square fit worked correctly. dist1 = fit.mul_var_dist.distributions[1] self.assertGreater(dist1.scale.a, 1) # Should be about 1-5 self.assertLess(dist1.scale.a, 5) # Should be about 1-5 self.assertGreater(dist1.scale.b, 2) # Should be about 2-10 self.assertLess(dist1.scale.b, 10) # Should be about 2-10 self.assertGreater(dist1.scale(0), 0.1) self.assertLess(dist1.scale(0), 10) self.assertEqual(dist1.scale.func_name, 'lnsquare2')
def test_fit_asymdecrease3(self): """ Tests a 2D fit that includes an asymdecrease3 dependence function. """ sample_hs, sample_tz, label_hs, label_tz = read_benchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), # Shape, Location, Scale, Shape2 'width_of_intervals': 0.5} dist_description_tz = {'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('asymdecrease3', None, 'lnsquare2') # Shape, Location, Scale } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) # Check whether the logarithmic square fit worked correctly. dist1 = fit.mul_var_dist.distributions[1] self.assertAlmostEqual(dist1.shape.a, 0, delta=0.1) # Should be about 0 self.assertAlmostEqual(dist1.shape.b, 0.35, delta=0.4) # Should be about 0.35 self.assertAlmostEqual(np.abs(dist1.shape.c), 0.45, delta=0.2) # Should be about 0.45 self.assertAlmostEquals(dist1.shape(0), 0.35, delta=0.2) # Should be about 0.35
def test_multi_processing(selfs): """ 2-d Fit with multiprocessing (specified by setting a value for timeout) """ # Define a sample and a fit. prng = np.random.RandomState(42) sample_1 = prng.weibull(1.5, 1000) * 3 sample_2 = [ 0.1 + 1.5 * np.exp(0.2 * point) + prng.lognormal(2, 0.2) for point in sample_1 ] dist_description_0 = { 'name': 'Weibull', 'dependency': (None, None, None), 'width_of_intervals': 2 } dist_description_1 = { 'name': 'Lognormal', 'dependency': (None, None, 0), 'functions': (None, None, 'exp3') } # Compute the fit. my_fit = Fit((sample_1, sample_2), (dist_description_0, dist_description_1), timeout=10)
def test_2d_benchmark_case(self): """ Reproduces the baseline results presented in doi: 10.1115/OMAE2019-96523 . """ sample_hs, sample_tz, label_hs, label_tz = read_benchmark_dataset( path='tests/testfiles/allyears_dataset_A.txt') # Describe the distribution that should be fitted to the sample. dist_description_0 = {'name': 'Weibull_3p', 'dependency': (None, None, None), 'width_of_intervals': 0.5} dist_description_1 = {'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), 'functions': ('exp3', None, 'power3')} # Shape, location, scale. # Compute the fit. my_fit = Fit((sample_hs, sample_tz), (dist_description_0, dist_description_1)) # Evaluate the fitted parameters. dist0 = my_fit.mul_var_dist.distributions[0] dist1 = my_fit.mul_var_dist.distributions[1] self.assertAlmostEqual(dist0.shape(0), 1.48, delta=0.02) self.assertAlmostEqual(dist0.scale(0), 0.944, delta=0.01) self.assertAlmostEqual(dist0.loc(0), 0.0981, delta=0.001) self.assertAlmostEqual(dist1.shape.a, 0, delta=0.001) self.assertAlmostEqual(dist1.shape.b, 0.308, delta=0.002) self.assertAlmostEqual(dist1.shape.c, -0.250, delta=0.002) self.assertAlmostEqual(dist1.scale.a, 1.47 , delta=0.02) self.assertAlmostEqual(dist1.scale.b, 0.214, delta=0.002) self.assertAlmostEqual(dist1.scale.c, 0.641, delta=0.002) self.assertAlmostEqual(dist1.scale(0), 4.3 , delta=0.1) self.assertAlmostEqual(dist1.scale(2), 6, delta=0.1) self.assertAlmostEqual(dist1.scale(5), 8, delta=0.1)
def test_2d_exponentiated_wbl_fit(self): """ Tests if a 2D fit that includes an exp. Weibull distribution works. """ prng = np.random.RandomState(42) # Draw 1000 samples from a Weibull distribution with shape=1.5 and scale=3, # which represents significant wave height. sample_hs = prng.weibull(1.5, 1000)*3 # Let the second sample, which represents zero-upcrossing period increase # with significant wave height and follow a Lognormal distribution with # mean=2 and sigma=0.2 sample_tz = [0.1 + 1.5 * np.exp(0.2 * point) + prng.lognormal(2, 0.2) for point in sample_hs] # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), # Shape, Location, Scale, Shape2 'width_of_intervals': 0.5} dist_description_tz = {'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('exp3', None, 'power3') # Shape, Location, Scale } # Fit the model to the data, first test a 1D fit. fit = Fit(sample_hs, dist_description_hs) # Now perform the 2D fit. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) dist0 = fit.mul_var_dist.distributions[0] self.assertGreater(dist0.shape(0), 1) # Should be about 1.5. self.assertLess(dist0.shape(0), 2) self.assertIsNone(dist0.loc(0)) # Has no location parameter, should be None. self.assertGreater(dist0.scale(0), 2) # Should be about 3. self.assertLess(dist0.scale(0), 4) self.assertGreater(dist0.shape2(0), 0.5) # Should be about 1. self.assertLess(dist0.shape2(0), 2)
def test_omae2020_wind_wave_model(self): """ Tests fitting the wind-wave model that was used in the publication 'Global hierarchical models for wind and wave contours' on dataset D. """ sample_v, sample_hs, label_v, label_hs = read_benchmark_dataset(path='tests/testfiles/1year_dataset_D.txt') # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters' : (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 20} # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) dist0 = fit.mul_var_dist.distributions[0] self.assertAlmostEqual(dist0.shape(0), 2.42, delta=1) self.assertAlmostEqual(dist0.scale(0), 10.0, delta=2) self.assertAlmostEqual(dist0.shape2(0), 0.761, delta=0.5) dist1 = fit.mul_var_dist.distributions[1] self.assertEqual(dist1.shape2(0), 5) inspection_data1 = fit.multiple_fit_inspection_data[1] self.assertEqual(inspection_data1.shape2_value[0], 5) self.assertAlmostEqual(inspection_data1.shape_value[0], 0.8, delta=0.5) # interval centered at 1 self.assertAlmostEqual(inspection_data1.shape_value[4], 1.5, delta=0.5) # interval centered at 9 self.assertAlmostEqual(inspection_data1.shape_value[9], 2.5, delta=1) # interval centered at 19 self.assertAlmostEqual(dist1.shape(0), 0.8, delta=0.3) self.assertAlmostEqual(dist1.shape(10), 1.6, delta=0.5) self.assertAlmostEqual(dist1.shape(20), 2.3, delta=0.7) self.assertAlmostEqual(dist1.shape.a, 0.582, delta=0.5) self.assertAlmostEqual(dist1.shape.b, 1.90, delta=1) self.assertAlmostEqual(dist1.shape.c, 0.248, delta=0.5) self.assertAlmostEqual(dist1.shape.d, 8.49, delta=5) self.assertAlmostEqual(inspection_data1.scale_value[0], 0.15, delta=0.2) # interval centered at 1 self.assertAlmostEqual(inspection_data1.scale_value[4], 1, delta=0.5) # interval centered at 9 self.assertAlmostEqual(inspection_data1.scale_value[9], 4, delta=1) # interval centered at 19 self.assertAlmostEqual(dist1.scale(0), 0.15, delta=0.5) self.assertAlmostEqual(dist1.scale(10), 1, delta=0.5) self.assertAlmostEqual(dist1.scale(20), 4, delta=1) self.assertAlmostEqual(dist1.scale.a, 0.394, delta=0.5) self.assertAlmostEqual(dist1.scale.b, 0.0178, delta=0.1) self.assertAlmostEqual(dist1.scale.c, 1.88, delta=0.8)
def test_wbl_fit_with_negative_location(self): """ Tests fitting a translated Weibull distribution which would result in a negative location parameter. """ sample_hs, sample_tz, label_hs, label_tz = read_benchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = {'name': 'Weibull_3p', 'dependency': (None, None, None)} # Fit the model to the data. fit = Fit((sample_hs, ), (dist_description_hs, )) # Correct values for 10 years of data can be found in # 10.1115/OMAE2019-96523 . Here we used 1 year of data. dist0 = fit.mul_var_dist.distributions[0] self.assertAlmostEqual(dist0.shape(0) / 10, 1.48 / 10, places=1) self.assertGreater(dist0.loc(0), 0.0) # Should be 0.0981 self.assertLess(dist0.loc(0), 0.3) # Should be 0.0981 self.assertAlmostEqual(dist0.scale(0), 0.944, places=1) # Shift the wave data with -1 m and fit again. sample_hs = sample_hs - 2 # Negative location values will be set to zero instead and a # warning will be raised. with self.assertWarns(RuntimeWarning): fit = Fit((sample_hs, ), (dist_description_hs, )) dist0 = fit.mul_var_dist.distributions[0] self.assertAlmostEqual(dist0.shape(0) / 10, 1.48 / 10, places=1) # Should be estimated to be 0.0981 - 2 and corrected to be 0. self.assertEqual(dist0.loc(0), 0) self.assertAlmostEqual(dist0.scale(0), 0.944, places=1)
def test_draw_sample_distribution(self): """ Create an example MultivariateDistribution (Vanem2012 model). """ # Define dependency tuple. dep1 = (None, None, None) dep2 = (0, None, 0) # Define parameters. shape = ConstantParam(1.471) loc = ConstantParam(0.8888) scale = ConstantParam(2.776) par1 = (shape, loc, scale) shape = FunctionParam('exp3', 0.0400, 0.1748, -0.2243) loc = None scale = FunctionParam('power3', 0.1, 1.489, 0.1901) par2 = (shape, loc, scale) del shape, loc, scale # Create distributions. dist1 = WeibullDistribution(*par1) dist2 = LognormalDistribution(*par2) distributions = [dist1, dist2] dependencies = [dep1, dep2] points = 1000000 mul_var_dist = MultivariateDistribution(distributions, dependencies) my_points = mul_var_dist.draw_sample(points) #Fit the sample # Describe the distribution that should be fitted to the sample. dist_description_0 = { 'name': 'Weibull', 'dependency': (None, None, None), 'width_of_intervals': 2 } dist_description_1 = { 'name': 'Lognormal', 'dependency': (0, None, 0), 'functions': ('exp3', None, 'power3') } my_fit = Fit([my_points[0], my_points[1]], [dist_description_0, dist_description_1]) print(my_fit.mul_var_dist.distributions[0].shape(0)) print(mul_var_dist.distributions[0].shape(0)) assert np.round(my_fit.mul_var_dist.distributions[0].shape(0), 2) == np.round(mul_var_dist.distributions[0].shape(0), 2)
def test_plot_windwave_fit(self): """ Plots goodness of fit graphs, for the marginal distribution of X1 and for the dependence function of X2|X1. Uses wind and wave data. """ sample_v, sample_hs, label_v, label_hs = \ read_ecbenchmark_dataset('datasets/1year_dataset_D.txt') label_v = 'v (m s$^{-1}$)' # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_v = { 'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2 } dist_description_hs = { 'name': 'Weibull_Exp', 'fixed_parameters': (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 50, 'do_use_weights_for_dependence_function': True } # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) dist0 = fit.mul_var_dist.distributions[0] fig = plt.figure(figsize=(12.5, 3.5), dpi=150) ax1 = fig.add_subplot(131) ax2 = fig.add_subplot(132) ax3 = fig.add_subplot(133) plot_marginal_fit(sample_v, dist0, fig=fig, ax=ax1, label=label_v, dataset_char='D') plot_dependence_functions(fit=fit, fig=fig, ax1=ax2, ax2=ax3, unconditonal_variable_label=label_v)
def test_plot_seastate_fit(self): """ Plots goodness of fit graphs, for the marginal distribution of X1 and for the dependence function of X2|X1. Uses sea state data. """ sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = { 'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 0.5 } dist_description_tz = { 'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('asymdecrease3', None, 'lnsquare2'), # Shape, Location, Scale 'min_datapoints_for_fit': 50 } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) dist0 = fit.mul_var_dist.distributions[0] fig = plt.figure(figsize=(12.5, 3.5), dpi=150) ax1 = fig.add_subplot(131) ax2 = fig.add_subplot(132) ax3 = fig.add_subplot(133) plot_marginal_fit(sample_hs, dist0, fig=fig, ax=ax1, label='$h_s$ (m)', dataset_char='A') plot_dependence_functions(fit=fit, fig=fig, ax1=ax2, ax2=ax3, unconditonal_variable_label=label_hs)
'width_of_intervals': 2 } dist_description_hs = { 'name': 'Weibull_Exp', 'fixed_parameters': (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 50, 'do_use_weights_for_dependence_function': True } # Fit the model to the dataset. fit = Fit((v_i, hs_i), (dist_description_v, dist_description_hs)) dist0 = fit.mul_var_dist.distributions[0] dist1 = fit.mul_var_dist.distributions[1] # Compute 50-yr IFORM contour. return_period = 50 ts = 1 # Sea state duration in hours. limits = [(0, 45), (0, 25)] # Limits of the computational domain. deltas = [0.05, 0.05] # Dimensions of the grid cells. hdc_contour_i = HDC(fit.mul_var_dist, return_period, ts, limits, deltas) c = sort_points_to_form_continous_line(hdc_contour_i.coordinates[0][0], hdc_contour_i.coordinates[0][1], do_search_for_optimal_start=True) hdc_contour_i.c = c if DO_COMPUTE_CONFIDENCE_INTERVAL:
# Define the structure of the probabilistic model that will be fitted to the # dataset. We will use the model that is recommended in DNV-RP-C205 (2010) on # page 38 and that is called 'conditonal modeling approach' (CMA). dist_description_hs = { 'name': 'Weibull_3p', 'dependency': (None, None, None), 'width_of_intervals': 0.5 } dist_description_tz = { 'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), #Shape, Location, Scale 'functions': ('exp3', None, 'power3') #Shape, Location, Scale } # Fit the hs-tz model to the data. fit = Fit((a_hs, a_tz), (dist_description_hs, dist_description_tz)) dist0 = fit.mul_var_dist.distributions[0] # Compute IFORM-contour with return periods of 20 years. return_period_20 = 20 iform_contour_20 = IFormContour(fit.mul_var_dist, return_period_20, 1, 100) contour_hs_20 = iform_contour_20.coordinates[0][0] contour_tz_20 = iform_contour_20.coordinates[0][1] # Read dataset D. DATASET_CHAR = 'D' file_path = 'datasets/' + DATASET_CHAR + '.txt' d_v, d_hs, label_v, label_hs = read_dataset(file_path) # Define the structure of the probabilistic model that will be fitted to the # dataset. We will use the model that is recommended in DNV-RP-C205 (2010) on
'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 50, 'do_use_weights_for_dependence_function': True } dist_description_t = { 'name': 'Lognormal_SigmaMu', 'dependency': (1, None, 1), #Shape, Location, Scale 'functions': ('asymdecrease3', None, 'lnsquare2'), #Shape, Location, Scale 'min_datapoints_for_fit': 50 } # Fit the model to the data. fit = Fit((v, hs, tp), (dist_description_v, dist_description_hs, dist_description_t)) joint_dist = fit.mul_var_dist dist_v = joint_dist.distributions[0] fig1 = plt.figure(figsize=(12.5, 4), dpi=150) ax1 = fig1.add_subplot(131) ax2 = fig1.add_subplot(132) ax3 = fig1.add_subplot(133) plot_marginal_fit(v, dist_v, fig=fig1, ax=ax1, label='$v$ (m s$^{-1}$)', dataset_char='D') plot_dependence_functions(fit=fit, fig=fig1,
def test_plot_contour_and_sample(self): """ Plots a contour together with the dataset that has been used to fit a distribution for the contour. """ sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = { 'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 0.5 } dist_description_tz = { 'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('asymdecrease3', None, 'lnsquare2'), # Shape, Location, Scale 'min_datapoints_for_fit': 50 } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) contour = IFormContour(fit.mul_var_dist, 20, 1, 50) contour_hs_20 = contour.coordinates[0][0] contour_tz_20 = contour.coordinates[0][1] # Find datapoints that exceed the 20-yr contour. hs_outside, tz_outside, hs_inside, tz_inside = \ points_outside(contour_hs_20, contour_tz_20, np.asarray(sample_hs), np.asarray(sample_tz)) # Compute the median tz conditonal on hs. hs = np.linspace(0, 14, 100) d1 = fit.mul_var_dist.distributions[1] c1 = d1.scale.a c2 = d1.scale.b tz = c1 + c2 * np.sqrt(np.divide(hs, 9.81)) fig = plt.figure(figsize=(5, 5), dpi=150) ax = fig.add_subplot(111) # Plot the 20-year contour and the sample. plotted_sample = SamplePlotData(x=np.asarray(sample_tz), y=np.asarray(sample_hs), ax=ax, x_inside=tz_inside, y_inside=hs_inside, x_outside=tz_outside, y_outside=hs_outside, return_period=20) plot_contour(x=contour_tz_20, y=contour_hs_20, ax=ax, contour_label='20-yr IFORM contour', x_label=label_tz, y_label=label_hs, line_style='b-', plotted_sample=plotted_sample, x_lim=(0, 19), upper_ylim=15, median_x=tz, median_y=hs, median_label='median of $T_z | H_s$') plot_wave_breaking_limit(ax)
# Define the structure of the probabilistic model that will be fitted to the # dataset. We will use the model that is recommended in DNV-RP-C205 (2010) on # page 38 and that is called 'conditonal modeling approach' (CMA). dist_description_hs = { "name": "Weibull_3p", "dependency": (None, None, None), "width_of_intervals": 0.5, } dist_description_v = { "name": "Weibull_2p", "dependency": (0, None, 0), # Shape, Location, Scale "functions": ("power3", None, "power3"), # Shape, Location, Scale } # Fit the model to the data. fit = Fit((sample_hs, sample_v), (dist_description_hs, dist_description_v)) mul_var_dist = fit.mul_var_dist ref_f = mul_var_dist.pdf(x.T) ref_f_weibull3 = mul_var_dist.distributions[0].pdf(x[:, 0]) ref_weibull3 = mul_var_dist.distributions[0] ref_weibull3_params = ( ref_weibull3.shape(None), ref_weibull3.loc(None), ref_weibull3.scale(None), ) ref_weibull2 = mul_var_dist.distributions[1]
plt.show() # Describe the distribution that should be fitted to the sample. dist_description_0 = { 'name': 'Weibull', 'dependency': (None, None, None), 'width_of_intervals': 2 } dist_description_1 = { 'name': 'Lognormal', 'dependency': (None, None, 0), 'functions': (None, None, 'exp3') } # Compute the fit. my_fit = Fit((sample_1, sample_2), (dist_description_0, dist_description_1)) # Plot the fit for the significant wave height, Hs. # For panel A: use a histogram. fig = plt.figure(figsize=(9, 4.5)) ax_1 = fig.add_subplot(121) param_grid = my_fit.multiple_fit_inspection_data[0].scale_at plt.hist(my_fit.multiple_fit_inspection_data[0].scale_samples[0], density=1, label='sample') shape = my_fit.mul_var_dist.distributions[0].shape(0) scale = my_fit.mul_var_dist.distributions[0].scale(0) plt.plot(np.linspace(0, 20, 100), sts.weibull_min.pdf(np.linspace(0, 20, 100), c=shape, loc=0,
'width_of_intervals': 2 } dist_description_hs = { 'name': 'Weibull_Exp', 'fixed_parameters': (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 50, 'do_use_weights_for_dependence_function': True } # Fit the model to the data. fit = Fit((v, hs), (dist_description_v, dist_description_hs)) joint_dist = fit.mul_var_dist print('Done with fitting the 2D joint distribution') # Show goodness of fit plot. dist_v = joint_dist.distributions[0] fig_fit = plt.figure(figsize=(12.5, 4), dpi=150) ax1 = fig_fit.add_subplot(131) ax2 = fig_fit.add_subplot(132) ax3 = fig_fit.add_subplot(133) plot_marginal_fit(v, dist_v, fig=fig_fit, ax=ax1, label='$v$ (m s$^{-1}$)', dataset_char='D')
def test_wrong_model(self): """ Tests wheter errors are raised when incorrect fitting models are specified. """ sample_v, sample_hs, label_v, label_hs = read_benchmark_dataset(path='tests/testfiles/1year_dataset_D.txt') # This structure is incorrect as there is not distribution called 'something'. dist_description_v = {'name': 'something', 'dependency': (None, None, None, None), 'fixed_parameters': (None, None, None, None), # shape, location, scale, shape2 'width_of_intervals': 2} with self.assertRaises(ValueError): # Fit the model to the data. fit = Fit((sample_v, ), (dist_description_v, )) # This structure is incorrect as there is not dependence function called 'something'. dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('something', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 20} with self.assertRaises(ValueError): # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) # This structure is incorrect as there will be only 1 or 2 intervals # that fit 2000 datapoints. dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 2000} with self.assertRaises(RuntimeError): # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) # This structure is incorrect as alpha3 is only compatible with # logistics4 . dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters' : (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('power3', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 20} with self.assertRaises(TypeError): # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) # This structure is incorrect as only shape2 of an exponentiated Weibull # distribution can be fixed at the moment. dist_description_v = {'name': 'Lognormal', 'dependency': (None, None, None, None), 'fixed_parameters': (None, None, 5, None), # shape, location, scale, shape2 'width_of_intervals': 2} with self.assertRaises(NotImplementedError): # Fit the model to the data. fit = Fit((sample_v, ), (dist_description_v, )) # This structure is incorrect as only shape2 of an exponentiated Weibull # distribution can be fixed at the moment. dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters' : (None, None, 5, None), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 20} with self.assertRaises(NotImplementedError): # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs))
dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters': (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 50, 'do_use_weights_for_dependence_function': True} # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) dist0 = fit.mul_var_dist.distributions[0] fig = plt.figure(figsize=(12.5, 3.5), dpi=150) ax1 = fig.add_subplot(131) ax2 = fig.add_subplot(132) ax3 = fig.add_subplot(133) plot_marginal_fit(sample_v, dist0, fig=fig, ax=ax1, label='$v$ (m s$^{-1}$)', dataset_char=DATASET_CHAR) plot_dependence_functions(fit=fit, fig=fig, ax1=ax2, ax2=ax3, unconditonal_variable_label=label_v) fig.suptitle('Dataset ' + DATASET_CHAR) fig.subplots_adjust(wspace=0.25, bottom=0.15) # Compute highest density contours with return periods of 0.01, 1 and 50 years. ts = 1 # Sea state duration in hours.
def fit_curves(mfm_item: MeasureFileModel, fit_settings, var_number): """ Interface to fit a probabilistic model to a measurement file with the viroconcom package. Parameters ---------- mfm_item : MeasureFileModel, Contains the measured data, which should be evaluated. fit_settings : ?, The settings how the fit should be performed. Here, the distribution, which should be fitted to the data, is specified. var_number : int, Number of random variables that the probabilistic model should have. Returns ------- fit : Fit, The fit contains the probabilistic model, which was fitted to the measurement data, as well as data describing how well the fit worked. """ data_path = mfm_item.measure_file.url if data_path[0] == '/': data_path = data_path[1:] data = pd.read_csv(data_path, sep=';', header=NR_LINES_HEADER - 1).as_matrix() dists = [] dates = [] for i in range(0, var_number): dates.append(data[:, i].tolist()) if i == 0: dists.append({ 'name': fit_settings['distribution_%s' % i], 'number_of_intervals': None, 'width_of_intervals': float(fit_settings['width_of_intervals_%s' % i]), 'dependency': [None, None, None] }) elif i == (var_number - 1): # last variable dists.append({ 'name': fit_settings['distribution_%s' % i], 'number_of_intervals': None, 'width_of_intervals': None, 'dependency': [ adjust(fit_settings['shape_dependency_%s' % i][0]), adjust(fit_settings['location_dependency_%s' % i][0]), adjust(fit_settings['scale_dependency_%s' % i][0]) ], 'functions': [ adjust(fit_settings['shape_dependency_%s' % i][1:]), adjust(fit_settings['location_dependency_%s' % i][1:]), adjust(fit_settings['scale_dependency_%s' % i][1:]) ] }) else: dists.append({ 'name': fit_settings['distribution_%s' % i], 'number_of_intervals': None, 'width_of_intervals': float(fit_settings['width_of_intervals_%s' % i]), 'dependency': [ adjust(fit_settings['shape_dependency_%s' % i][0]), adjust(fit_settings['location_dependency_%s' % i][0]), adjust(fit_settings['scale_dependency_%s' % i][0]) ], 'functions': [ adjust(fit_settings['shape_dependency_%s' % i][1:]), adjust(fit_settings['location_dependency_%s' % i][1:]), adjust(fit_settings['scale_dependency_%s' % i][1:]) ] }) # Delete unused parameters if dists[i].get('name') == 'Lognormal_SigmaMu' and i > 0: dists[i].get('dependency')[1] = None dists[i].get('functions')[1] = None elif dists[i].get('name') == 'Normal' and i > 0: dists[i].get('dependency')[0] = None dists[i].get('functions')[0] = None fit = Fit(dates, dists, timeout=MAX_COMPUTING_TIME) return fit
# Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters': (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 50, 'do_use_weights_for_dependence_function': True} # Fit the model to the dataset. fit = Fit((dataset_d_v, dataset_d_hs), (dist_description_v, dist_description_hs)) dist0 = fit.mul_var_dist.distributions[0] dist1 = fit.mul_var_dist.distributions[1] # Compute 50-yr contour. return_period = 50 ts = 1 # Sea state duration in hours. limits = [(0, 45), (0, 25)] # Limits of the computational domain. deltas = [GRID_CELL_SIZE, GRID_CELL_SIZE] # Dimensions of the grid cells. hdc = HighestDensityContour(fit.mul_var_dist, return_period, ts, limits, deltas) contour_with_all_data = sort_points_to_form_continous_line( hdc.coordinates[0], hdc.coordinates[1], do_search_for_optimal_start=True) # Create the figure for plotting the contours. fig, axs = plt.subplots(len(NR_OF_YEARS_TO_DRAW), 2, sharex=True, sharey=True,
# Define the structure of the probabilistic model that will be fitted to the # dataset. We will use the model that is recommended in DNV-RP-C205 (2010) on # page 38 and that is called 'conditonal modeling approach' (CMA). dist_description_hs = { 'name': 'Weibull_3p', 'dependency': (None, None, None), 'width_of_intervals': 0.5 } dist_description_tz = { 'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), #Shape, Location, Scale 'functions': ('exp3', None, 'power3') #Shape, Location, Scale } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) dist0 = fit.mul_var_dist.distributions[0] print('First variable: ' + dist0.name + ' with ' + ' scale: ' + str(dist0.scale) + ', ' + ' shape: ' + str(dist0.shape) + ', ' + ' location: ' + str(dist0.loc)) print('Second variable: ' + str(fit.mul_var_dist.distributions[1])) fig = plt.figure(figsize=(10, 5), dpi=150) plot_marginal_fit(sample_hs, dist0, fig=fig, label='Significant wave height (m)') fig.suptitle('Dataset ' + DATASET_CHAR) fig = plt.figure(figsize=(6, 5), dpi=150) plot_dependence_functions(fit=fit,
delete2 = np.where(data2 == 100.) data1 = np.delete(data1, delete2) data2 = np.delete(data2, delete2) data1 = data1.round(decimals=6) data2 = data2.round(decimals=6) # Describe the distribution that should be fitted to the sample. dist_description_0 = {'name': 'Weibull', 'dependency': (None, None, None), 'width_of_intervals': 2} dist_description_1 = {'name': 'Lognormal', 'dependency': (0, None, 0), 'functions': ('exp3', None, 'power3')} my_fit = Fit([data1, data2], [dist_description_0, dist_description_1]) dsc = DirectSamplingContour(my_fit.mul_var_dist, 5000000, 25, 24, 6) direct_sampling_contour = dsc.direct_sampling_contour() # Plot the contour and the sample. fig, axes = plt.subplots(2) axes[0].scatter(dsc.data[0], dsc.data[1], marker='.') axes[0].plot(direct_sampling_contour[0], direct_sampling_contour[1], color='red') axes[0].title.set_text('Monte-Carlo-Sample') axes[0].set_ylabel('Mean wave period (s)') axes[1].scatter(data1, data2) axes[1].plot(direct_sampling_contour[0], direct_sampling_contour[1], color='red') axes[1].title.set_text('Data from ECMWF') axes[1].set_xlabel('Significant wave height (m)')