def corrected_day_average(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame, merged_sim_obs: pd.DataFrame = False, merged_cor_obs: pd.DataFrame = False, titles: dict = None, outformat: str = 'plotly') -> go.Figure or str: """ Calculates and plots the daily average streamflow. This function uses hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full comparison of bias correction, you can provide them to save time Args: corrected: the response from the geoglows.bias.correct_historical_simulation function simulated: the csv response from historic_simulation merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed) merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed) observed: the dataframe of observed data. Must have a datetime index and a single column of flow values outformat: either 'plotly' or 'plotly_html' (default plotly) titles: (dict) Extra info to show on the title of the plot. For example: {'Reach ID': 1234567, 'Drainage Area': '1000km^2'} Returns: plotly.GraphObject: plotly object, especially for use with python notebooks and the .show() method """ if corrected is False and simulated is False and observed is False: if merged_sim_obs is not False and merged_cor_obs is not False: pass # if you provided the merged dataframes already, we use those else: # merge the datasets together merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed) merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed) daily_avg = hd.daily_average(merged_sim_obs) daily_avg2 = hd.daily_average(merged_cor_obs) scatters = [ go.Scatter(x=daily_avg.index, y=daily_avg.iloc[:, 1].values, name='Observed Data'), go.Scatter(x=daily_avg.index, y=daily_avg.iloc[:, 0].values, name='Simulated Data'), go.Scatter(x=daily_avg2.index, y=daily_avg2.iloc[:, 0].values, name='Corrected Simulated Data'), ] layout = go.Layout( title=_build_title('Daily Average Streamflow Comparison', titles), xaxis=dict(title='Days'), yaxis=dict(title='Discharge (m<sup>3</sup>/s)', autorange=True), showlegend=True) if outformat == 'plotly': return go.Figure(data=scatters, layout=layout) elif outformat == 'plotly_html': return offline_plot( go.Figure(data=scatters, layout=layout), config={'autosizable': True, 'responsive': True}, output_type='div', include_plotlyjs=False ) raise ValueError('Invalid outformat chosen. Choose plotly or plotly_html')
def get_monthlyAverages(request): """ Get observed data from csv files in Hydroshare Get historic simulations from ERA Interim """ get_data = request.GET global nomRiver global nomEstacion global simulated_df global observed_df global corrected_df try: '''Merge Data''' merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df) merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df) '''Plotting Data''' monthly_avg = hd.monthly_average(merged_df) monthly_avg2 = hd.monthly_average(merged_df2) monthly_avg_obs_Q = go.Scatter(x=monthly_avg.index, y=monthly_avg.iloc[:, 1].values, name='Observed', ) monthly_avg_sim_Q = go.Scatter(x=monthly_avg.index, y=monthly_avg.iloc[:, 0].values, name='Simulated', ) monthly_avg_corr_sim_Q = go.Scatter(x=monthly_avg2.index, y=monthly_avg2.iloc[:, 0].values, name='Corrected Simulated', ) layout = go.Layout( title='Monthly Average Streamflow for River {0} at {1}'.format(nomRiver, nomEstacion), xaxis=dict(title='Months', ), yaxis=dict(title='Discharge (m<sup>3</sup>/s)', autorange=True), showlegend=True) chart_obj = PlotlyView( go.Figure(data=[monthly_avg_obs_Q, monthly_avg_sim_Q, monthly_avg_corr_sim_Q], layout=layout)) context = { 'gizmo_object': chart_obj, } return render(request, 'historical_validation_tool_somalia/gizmo_ajax.html', context) except Exception as e: print(str(e)) return JsonResponse({'error': 'No data found for the selected station.'})
def statistics_tables(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame) -> pd.DataFrame: # merge the datasets together merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed) merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed) metrics = ['ME', 'RMSE', 'NRMSE (Mean)', 'MAPE', 'NSE', 'KGE (2009)', 'KGE (2012)'] # Merge Data table1 = hs.make_table(merged_dataframe=merged_sim_obs, metrics=metrics) table2 = hs.make_table(merged_dataframe=merged_cor_obs, metrics=metrics) table2 = table2.rename(index={'Full Time Series': 'Corrected Full Time Series'}) table1 = table1.rename(index={'Full Time Series': 'Original Full Time Series'}) table1 = table1.transpose() table2 = table2.transpose() return pd.merge(table1, table2, right_index=True, left_index=True)
def statistics_tables(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame, merged_sim_obs: pd.DataFrame = False, merged_cor_obs: pd.DataFrame = False, metrics: list = None) -> str: """ Makes an html table of various statistical metrics for corrected vs observed data alongside the same metrics for the simulated vs observed data as a way to see the improvement made by the bias correction. This function uses hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full comparison of bias correction, you can provide them to save time Args: corrected: A dataframe with a datetime index and a single column of streamflow values simulated: A dataframe with a datetime index and a single column of streamflow values observed: A dataframe with a datetime index and a single column of streamflow values merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed) merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed) metrics: A list of abbreviated statistic names. See the documentation for HydroErr """ if corrected is False and simulated is False and observed is False: if merged_sim_obs is not False and merged_cor_obs is not False: pass # if you provided the merged dataframes already, we use those else: # merge the datasets together merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed) merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed) if metrics is None: metrics = [ 'ME', 'RMSE', 'NRMSE (Mean)', 'MAPE', 'NSE', 'KGE (2009)', 'KGE (2012)' ] # Merge Data table1 = hs.make_table(merged_dataframe=merged_sim_obs, metrics=metrics) table2 = hs.make_table(merged_dataframe=merged_cor_obs, metrics=metrics) table2 = table2.rename( index={'Full Time Series': 'Corrected Full Time Series'}) table1 = table1.rename( index={'Full Time Series': 'Original Full Time Series'}) table1 = table1.transpose() table2 = table2.transpose() table_final = pd.merge(table1, table2, right_index=True, left_index=True) return table_final.to_html()
def setUp(self): pd.options.display.max_columns = 100 # Defining the URLs of the datasets sfpt_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/sfpt_data/magdalena' \ r'-calamar_interim_data.csv ' glofas_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/GLOFAS_Data/magdalena' \ r'-calamar_ECMWF_data.csv ' # Merging the data self.merged_df = hd.merge_data(sfpt_url, glofas_url, column_names=('SFPT', 'GLOFAS'))
def volume_table_ajax(request): """Calculates the volumes of the simulated and observed streamflow""" get_data = request.GET global simulated_df global observed_df global corrected_df try: '''Merge Data''' merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df) merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df) '''Plotting Data''' sim_array = merged_df.iloc[:, 0].values obs_array = merged_df.iloc[:, 1].values corr_array = merged_df2.iloc[:, 0].values sim_volume = round((integrate.simps(sim_array)) * 0.0864, 3) obs_volume = round((integrate.simps(obs_array)) * 0.0864, 3) corr_volume = round((integrate.simps(corr_array)) * 0.0864, 3) resp = { "sim_volume": sim_volume, "obs_volume": obs_volume, "corr_volume": corr_volume, } return JsonResponse(resp) except Exception as e: print(str(e)) return JsonResponse({'error': 'No data found for the selected station.'})
def get_scatterPlotLogScale(request): """ Get observed data from csv files in Hydroshare Get historic simulations from ERA Interim """ get_data = request.GET global codEstacion global nomEstacion global simulated_df global observed_df global corrected_df try: '''Merge Data''' merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df) merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df) '''Plotting Data''' scatter_data = go.Scatter(x=merged_df.iloc[:, 0].values, y=merged_df.iloc[:, 1].values, mode='markers', name='original', marker=dict(color='#ef553b')) scatter_data2 = go.Scatter(x=merged_df2.iloc[:, 0].values, y=merged_df2.iloc[:, 1].values, mode='markers', name='corrected', marker=dict(color='#00cc96')) min_value = min(min(merged_df.iloc[:, 1].values), min(merged_df.iloc[:, 0].values)) max_value = max(max(merged_df.iloc[:, 1].values), max(merged_df.iloc[:, 0].values)) line_45 = go.Scatter(x=[min_value, max_value], y=[min_value, max_value], mode='lines', name='45deg line', line=dict(color='black')) layout = go.Layout( title="Scatter Plot for {0} - {1} (Log Scale)".format( codEstacion, nomEstacion), xaxis=dict( title='Simulated', type='log', ), yaxis=dict(title='Observed', type='log', autorange=True), showlegend=True) chart_obj = PlotlyView( go.Figure(data=[scatter_data, scatter_data2, line_45], layout=layout)) context = { 'gizmo_object': chart_obj, } return render( request, 'historical_validation_tool_dominican_republic/gizmo_ajax.html', context) except Exception as e: print(str(e)) return JsonResponse( {'error': 'No data found for the selected station.'})
#plt.figure(2) #plt.figure(figsize=(15, 9)) #plt.plot(eraI_df.index, eraI_df.iloc[:, 0].values, 'k', color='blue', label='ERA-Interim Streamflow') #plt.title('ERA-Interim Hydrograph for COMID: ' + str(comid)) #plt.xlabel('Date') #plt.ylabel('Streamflow (m$^3$/s)') #plt.legend() #plt.grid() #plt.xlim(eraI_df.index[0], eraI_df.index[len(eraI_df.index) - 1]) #t = pd.date_range(eraI_df.index[0], eraI_df.index[len(eraI_df.index) - 1], periods=10).to_pydatetime() #plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) #plt.tight_layout() #plt.savefig(plot_sim_hyd_dir + '/ERA-Interim Hydrograph for ' + str(comid) + '.png') # Merging the Data merged_df = hd.merge_data(sim_df=eraI_df, obs_df=era5_df) #'''Tables and Plots''' # Appending the table to the final table table = hs.make_table(merged_df, metrics=[ 'ME', 'MAE', 'MAPE', 'RMSE', 'NRMSE (Mean)', 'NSE', 'KGE (2009)', 'KGE (2012)', 'R (Pearson)', 'R (Spearman)', 'r2' ], location=point, remove_neg=False, remove_zero=False) all_station_table = all_station_table.append(table) # Making plots for all the stations
def corrected_volume_compare(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame, merged_sim_obs: pd.DataFrame = False, merged_cor_obs: pd.DataFrame = False, titles: dict = None, outformat: str = 'plotly') -> go.Figure or str: """ Calculates and plots the cumulative volume output on each of the 3 datasets provided. This function uses hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full comparison of bias correction, you can provide them to save time Args: corrected: the response from the geoglows.bias.correct_historical_simulation function simulated: the csv response from historic_simulation observed: the dataframe of observed data. Must have a datetime index and a single column of flow values merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed) merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed) outformat: either 'plotly' or 'plotly_html' (default plotly) titles: (dict) Extra info to show on the title of the plot. For example: {'Reach ID': 1234567, 'Drainage Area': '1000km^2'} Returns: plotly.GraphObject: plotly object, especially for use with python notebooks and the .show() method """ if corrected is False and simulated is False and observed is False: if merged_sim_obs is not False and merged_cor_obs is not False: pass # if you provided the merged dataframes already, we use those else: # merge the datasets together merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed) merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed) sim_array = merged_sim_obs.iloc[:, 0].values obs_array = merged_sim_obs.iloc[:, 1].values corr_array = merged_cor_obs.iloc[:, 0].values sim_volume_dt = sim_array * 0.0864 obs_volume_dt = obs_array * 0.0864 corr_volume_dt = corr_array * 0.0864 sim_volume_cum = [] obs_volume_cum = [] corr_volume_cum = [] sum_sim = 0 sum_obs = 0 sum_corr = 0 for i in sim_volume_dt: sum_sim = sum_sim + i sim_volume_cum.append(sum_sim) for j in obs_volume_dt: sum_obs = sum_obs + j obs_volume_cum.append(sum_obs) for k in corr_volume_dt: sum_corr = sum_corr + k corr_volume_cum.append(sum_corr) observed_volume = go.Scatter(x=merged_sim_obs.index, y=obs_volume_cum, name='Observed', ) simulated_volume = go.Scatter(x=merged_sim_obs.index, y=sim_volume_cum, name='Simulated', ) corrected_volume = go.Scatter(x=merged_cor_obs.index, y=corr_volume_cum, name='Corrected Simulated', ) layout = go.Layout( title=_build_title('Cumulative Volume Comparison', titles), xaxis=dict(title='Datetime', ), yaxis=dict(title='Volume (m<sup>3</sup>)', autorange=True), showlegend=True) if outformat == 'plotly': return go.Figure(data=[observed_volume, simulated_volume, corrected_volume], layout=layout) elif outformat == 'plotly_html': return offline_plot( go.Figure(data=[observed_volume, simulated_volume, corrected_volume], layout=layout), config={'autosizable': True, 'responsive': True}, output_type='div', include_plotlyjs=False ) raise ValueError('Invalid outformat chosen. Choose plotly or plotly_html')
'ID: Low vs. Med Res', 'ID: Low vs. High Res', 'ID: Med vs. High Res', 'MO: Low vs. Med Res', 'MO: Low vs. High Res', 'MO: Med vs. High Res', 'NY: Low vs. Med Res', 'NY: Low vs. High Res', 'NY: Med vs. High Res', 'OR: Low vs. Med Res', 'OR: Low vs. High Res', 'OR: Med vs. High Res', 'COL: Low vs. Med Res', 'COL: Low vs. High Res', 'COL: Med vs. High Res' ] # seasonal = [['01-01', '03-31'], ['04-01', '06-30'], ['07-01', '09-30'], ['10-01', '12-31']] # Create df for each comparison, append to overall df table = pd.DataFrame() # Can make seasonal_periods=seasonal (see above) for s, o, n in zip(sim_list, obs_list, name_list): merged_df = hd.merge_data(sim_df=s, obs_df=o) temp_table = ha.make_table(merged_df, metrics=my_metrics, seasonal_periods=None, location=n) table = table.append(temp_table) table.to_csv( '/home/chrisedwards/Documents/rapid_output/stat_comparison/Statistical_Summary.csv' ) # print(table) # # # CSV including: Full Time Series # table_full_yr = table[table.index.str.contains('Full')] # table_full_yr.to_csv('/home/chrisedwards/Documents/rapid_output/statistical_comparison/Full_Year_Stats.csv') # # # print(table_full_yr)
ERA5_Files = [] ERAI_Files = [] for comid, name in zip(COMIDs, Names): ERA5_Files.append( '/Users/student/Dropbox/PhD/2020 Winter/Dissertation_v9/Africa/Blue_Nile/Data/Historical/simulated_data/ERA_5/Monthly_Corrected/' + str(comid) + '_' + str(name) + '.csv') ERAI_Files.append( '/Users/student/Dropbox/PhD/2020 Winter/Dissertation_v9/Africa/Blue_Nile/Data/Historical/simulated_data/ERA_Interim/Monthly_Corrected/' + str(comid) + '_' + str(name) + '.csv') for comid, name, rio, ERA5_File, ERAI_File in zip(COMIDs, Names, Rivers, ERA5_Files, ERAI_Files): print(comid, name, rio) #Merging the Data merged_df = hd.merge_data(ERAI_File, ERA5_File) monthly_avg = hd.monthly_average(merged_df) monthly_std_error = hd.monthly_std_error(merged_data=merged_df) ERA5_monthly_avg = monthly_avg[['Observed']] ERA_Interim_monthly_avg = monthly_avg[['Simulated']] ERA5_monthly_std_error = monthly_std_error[['Observed']] ERA_Interim_monthly_std_error = monthly_std_error[['Simulated']] observed_monthly = pd.read_csv('/Users/student/Dropbox/PhD/2020 Winter/Dissertation_v9/Africa/Blue_Nile/Data/Historical/observed_data/Multiannual_Mean_Streamflow/{0}_{1}.csv'.format(comid, name), dtype={'Month': str}) observed_monthly.set_index('Month', inplace=True) observed_monthly_avg = observed_monthly[['Mean Streamflow (m3/s)']] observed_monthly_std_error = observed_monthly[['Standard Error']]
def setUp(self): sfpt_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/sfpt_data/' \ r'magdalena-calamar_interim_data.csv' glofas_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/GLOFAS_Data/' \ r'magdalena-calamar_ECMWF_data.csv' self.merged_df = hd.merge_data(sfpt_url, glofas_url, column_names=('Streamflow Prediction Tool', 'GLOFAS'))
'Low Res', 'Med Res', 'High Res', 'Low Res', 'Med Res', 'High Res', 'Low Res', 'Med Res', 'High Res', 'Low Res', 'Med Res', 'High Res', 'Low Res', 'Med Res', 'High Res' ] s2 = 'Observed' # This list specifies which metrics to use: metrics = [] # This list controls the axis labels: labels = ['Datetime', 'Streamflow (cms)'] # End of Dynamic Input. Do NOT Change the following ------------------------------------------- for s, o, t, c1, s1 in zip(list_sim, list_obs, range(15), color1, series1): temp_merged = hd.merge_data(sim_df=x_df, obs_df=list_obs_df[o]) temp_davg = hd.daily_average(temp_merged) obs_loop_df = temp_davg.drop(index='02/29', columns='Simulated') sim_temp_df = list_riv_mouth[s] sim_loop_in = sim_temp_df.index = pd.date_range( '2001-01-01', '2001-12-31').strftime("%m/%d") # dates=pd.date_range('2001-01-01', '2001-12-31').strftime("%m/%d") sim_loop_df = pd.DataFrame( sim_loop_in, index=pd.date_range('2001-01-01', '2001-12-31').strftime("%m/%d")) group = [sim_loop_df, obs_loop_df] merged_df = pd.concat(group, axis=1) filename = list_titles[t] hv.plot(merged_data_df=merged_df,
metrics = [] # This list controls the axis labels: labels=['Datetime', 'Streamflow (cms)'] # End of Dynamic Input. Do NOT Change the following ------------------------------------------- # Create a list of 18 stream modified Time Series list_riv_part = [] for riv in list_riv_mouth: riv_part = riv.loc[begin_date:end_date] list_riv_part.append(riv_part) for s, o, c1, t, s1 in zip(list_sim, list_obs, color1, range(18), series1): merged_df = hd.merge_data(sim_df=list_riv_part[s], obs_df=list_obs_df[o]) filename = year + ': ' + list_titles[t] hv.plot(merged_data_df=merged_df, title=filename, linestyles=[c1, 'k-'], legend=(s1, series2), labels=labels, metrics = metrics, grid=True) plt.savefig('/home/chrisedwards/Documents/rapid_output/graphs/{}.png'.format(filename))
plt.title('Simulated Hydrograph for ' + str(id) + ' - ' + name + '\n River: ' + rio + '. COMID - ' + str(comid)) plt.xlabel('Date') plt.ylabel('Streamflow (m$^3$/s)') plt.legend() plt.grid() plt.xlim(dates_sim[0], dates_sim[len(dates_sim) - 1]) t = pd.date_range(dates_sim[0], dates_sim[len(dates_sim) - 1], periods=10).to_pydatetime() plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) plt.tight_layout() plt.savefig(plot_sim_hyd_dir + '/Simulated Hydrograph for ' + str(id) + ' - ' + name + '. COMID - ' + str(comid) + '.png') #Merging the Data merged_df = hd.merge_data(simFile, obsFile) '''Tables and Plots''' # Appending the table to the final table table = hs.make_table(merged_df, metrics=[ 'ME', 'MAE', 'MAPE', 'RMSE', 'NRMSE (Mean)', 'NSE', 'KGE (2009)', 'KGE (2012)', 'R (Pearson)', 'R (Spearman)', 'r2' ], location=id, remove_neg=False, remove_zero=False) all_station_table = all_station_table.append(table) #Making plots for all the stations
'AZ: Sim (str-60) vs Obs (gauge-09492400)', 'MO: Sim (str-50) vs Obs (gauge-07013000)', 'MO: Sim (str-51) vs Obs (gauge-07014000)', 'NY: Sim (str-47) vs Obs (gauge-01413408)', 'NY: Sim (str-46) vs Obs (gauge-01413398)', 'OR: Sim (str-58) vs Obs (gauge-14306400)', 'OR: Sim (str-47) vs Obs (gauge-14306100)' ] # This list specifies which metrics to use: metrics = [] # This list controls the axis labels: labels = ['Datetime', 'Streamflow (cms)'] for s, o, t in zip(range(12), range(12), range(12)): merged_df = hd.merge_data(sim_df=list_sim_df[s], obs_df=list_obs_df[o]) da_df = hd.daily_average(merged_df) filename = list_titles[t] + ' Daily Average' hv.plot(merged_data_df=da_df, title=filename, linestyles=['b-', 'k-'], legend=('Sim', 'Obs'), labels=labels, metrics=metrics, x_season=True, grid=True) plt.tight_layout() plt.savefig( '/home/chrisedwards/Documents/rapid_output/graphs/{}.png'.format( filename))
# This list specifies which metrics to use: metrics = [] # This list controls the axis labels: labels = ['Datetime', 'Streamflow (cms)'] # End of Dynamic Input. Do NOT Change the following ------------------------------------------- # Create a list of 18 stream modified Time Series list_riv_part_era5 = [] for riv in list_riv_mouth_era5: riv_part_era5 = riv.loc[begin_date:end_date] list_riv_part_era5.append(riv_part_era5) for s, o, t, s1, s2 in zip(list_sim, list_obs, range(18), series1, series2): merged_df = hd.merge_data(sim_df=list_riv_part_era5[s], obs_df=list_riv_part_era5[o]) filename = year + ' (ERA-5): ' + list_titles[t] hv.scatter(merged_data_df=merged_df, title=filename, labels=(s1, s2), metrics=metrics, marker_style=".", grid=True, log_scale=False, line45=True) plt.tight_layout() plt.savefig( '/home/chrisedwards/Documents/rapid_output/graphs/{}.png'.format( filename))
def get_scatterPlot(request): """ Get observed data from csv files in Hydroshare Get historic simulations from ERA Interim """ get_data = request.GET global codEstacion global nomEstacion global simulated_df global observed_df global corrected_df try: '''Merge Data''' merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df) merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df) '''Plotting Data''' scatter_data = go.Scatter(x=merged_df.iloc[:, 0].values, y=merged_df.iloc[:, 1].values, mode='markers', name='original', marker=dict(color='#ef553b')) scatter_data2 = go.Scatter(x=merged_df2.iloc[:, 0].values, y=merged_df2.iloc[:, 1].values, mode='markers', name='corrected', marker=dict(color='#00cc96')) min_value = min(min(merged_df.iloc[:, 1].values), min(merged_df.iloc[:, 0].values)) max_value = max(max(merged_df.iloc[:, 1].values), max(merged_df.iloc[:, 0].values)) line_45 = go.Scatter(x=[min_value, max_value], y=[min_value, max_value], mode='lines', name='45deg line', line=dict(color='black')) slope, intercept, r_value, p_value, std_err = sp.linregress( merged_df.iloc[:, 0].values, merged_df.iloc[:, 1].values) slope2, intercept2, r_value2, p_value2, std_err2 = sp.linregress( merged_df2.iloc[:, 0].values, merged_df2.iloc[:, 1].values) line_adjusted = go.Scatter( x=[min_value, max_value], y=[slope * min_value + intercept, slope * max_value + intercept], mode='lines', name='{0}x + {1} (Original)'.format(str(round(slope, 2)), str(round(intercept, 2))), line=dict(color='red')) line_adjusted2 = go.Scatter(x=[min_value, max_value], y=[ slope2 * min_value + intercept2, slope2 * max_value + intercept2 ], mode='lines', name='{0}x + {1} (Corrected)'.format( str(round(slope2, 2)), str(round(intercept2, 2))), line=dict(color='green')) layout = go.Layout( title='Scatter Plot for {0}-{1} <br> COMID: {2}'.format( watershed, subbasin, comid), xaxis=dict(title='Simulated', ), yaxis=dict(title='Observed', autorange=True), showlegend=True) chart_obj = PlotlyView( go.Figure(data=[ scatter_data, scatter_data2, line_45, line_adjusted, line_adjusted2 ], layout=layout)) context = { 'gizmo_object': chart_obj, } return render( request, 'historical_validation_tool_west_africa/gizmo_ajax.html', context) except Exception as e: print(str(e)) return JsonResponse( {'error': 'No data found for the selected station.'})
def get_volumeAnalysis(request): """ Get observed data from csv files in Hydroshare Get historic simulations from ERA Interim """ get_data = request.GET global codEstacion global nomEstacion global simulated_df global observed_df global corrected_df try: '''Merge Data''' merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df) merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df) '''Plotting Data''' sim_array = merged_df.iloc[:, 0].values obs_array = merged_df.iloc[:, 1].values corr_array = merged_df2.iloc[:, 0].values sim_volume_dt = sim_array * 0.0864 obs_volume_dt = obs_array * 0.0864 corr_volume_dt = corr_array * 0.0864 sim_volume_cum = [] obs_volume_cum = [] corr_volume_cum = [] sum_sim = 0 sum_obs = 0 sum_corr = 0 for i in sim_volume_dt: sum_sim = sum_sim + i sim_volume_cum.append(sum_sim) for j in obs_volume_dt: sum_obs = sum_obs + j obs_volume_cum.append(sum_obs) for k in corr_volume_dt: sum_corr = sum_corr + k corr_volume_cum.append(sum_corr) observed_volume = go.Scatter( x=merged_df.index, y=obs_volume_cum, name='Observed', ) simulated_volume = go.Scatter( x=merged_df.index, y=sim_volume_cum, name='Simulated', ) corrected_volume = go.Scatter( x=merged_df2.index, y=corr_volume_cum, name='Corrected Simulated', ) layout = go.Layout( title='Observed & Simulated Volume at<br> {0} - {1}'.format( codEstacion, nomEstacion), xaxis=dict(title='Dates', ), yaxis=dict(title='Volume (Mm<sup>3</sup>)', autorange=True), showlegend=True) chart_obj = PlotlyView( go.Figure( data=[observed_volume, simulated_volume, corrected_volume], layout=layout)) context = { 'gizmo_object': chart_obj, } return render( request, 'historical_validation_tool_dominican_republic/gizmo_ajax.html', context) except Exception as e: print(str(e)) return JsonResponse( {'error': 'No data found for the selected station.'})
era5_prec.index = pd.to_datetime(era5_prec.index) era5_prec.rename({'Precipitation (mm)': 'ERA-5 Precipitation (mm)'}, axis=1, inplace=True) erai_prec = pd.read_csv('/volumes/files/ECMWF_Precipitation/ERA_Interim/Daily_GeoTIFF_Clipped/{0}.csv'.format(region), index_col=0) erai_prec.index = pd.to_datetime(erai_prec.index) erai_prec.rename({'Precipitation (mm)': 'ERA-I Precipitation (mm)'}, axis=1, inplace=True) era5_run = pd.read_csv('/volumes/files/ECMWF_Runoff/ERA_5/Daily_GeoTIFF_Clipped/{0}.csv'.format(region), index_col=0) era5_run.index = pd.to_datetime(era5_run.index) era5_run.rename({'Runoff (mm)': 'ERA-5 Runoff (mm)'}, axis=1, inplace=True) erai_run = pd.read_csv('/volumes/files/ECMWF_Runoff/ERA_Interim/Daily_GeoTIFF_Clipped/{0}.csv'.format(region), index_col=0) erai_run.index = pd.to_datetime(erai_run.index) erai_run.rename({'Runoff (mm)': 'ERA-I Runoff (mm)'}, axis=1, inplace=True) merged_prec = hd.merge_data(sim_df=erai_prec, obs_df=era5_prec) plt.figure(1) plt.figure(figsize=(17.7983738762, 11)) plt.plot(merged_prec.index, merged_prec.iloc[:, 0].values, 'k', color='red', label='ERA-5') plt.plot(merged_prec.index, merged_prec.iloc[:, 1].values, 'k', color='blue', label='ERA-Interim') plt.title('ERA-5 and ERA-Interim Precipitation at ' + region) plt.xlabel('Date') plt.ylabel('Precipitation (mm)') plt.legend() plt.grid() plt.xlim(merged_prec.index[0], merged_prec.index[len(merged_prec.index) - 1]) #t = pd.date_range(merged_prec.index[0], merged_prec.index[len(merged_prec.index) - 1], periods=10).to_pydatetime() #plt.xticks(t) #plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) plt.tight_layout()
def make_table_ajax(request): get_data = request.GET global simulated_df global observed_df global corrected_df try: # Indexing the metrics to get the abbreviations selected_metric_abbr = get_data.getlist("metrics[]", None) # print(selected_metric_abbr) # Retrive additional parameters if they exist # Retrieving the extra optional parameters extra_param_dict = {} if request.GET.get('mase_m', None) is not None: mase_m = float(request.GET.get('mase_m', None)) extra_param_dict['mase_m'] = mase_m else: mase_m = 1 extra_param_dict['mase_m'] = mase_m if request.GET.get('dmod_j', None) is not None: dmod_j = float(request.GET.get('dmod_j', None)) extra_param_dict['dmod_j'] = dmod_j else: dmod_j = 1 extra_param_dict['dmod_j'] = dmod_j if request.GET.get('nse_mod_j', None) is not None: nse_mod_j = float(request.GET.get('nse_mod_j', None)) extra_param_dict['nse_mod_j'] = nse_mod_j else: nse_mod_j = 1 extra_param_dict['nse_mod_j'] = nse_mod_j if request.GET.get('h6_k_MHE', None) is not None: h6_mhe_k = float(request.GET.get('h6_k_MHE', None)) extra_param_dict['h6_mhe_k'] = h6_mhe_k else: h6_mhe_k = 1 extra_param_dict['h6_mhe_k'] = h6_mhe_k if request.GET.get('h6_k_AHE', None) is not None: h6_ahe_k = float(request.GET.get('h6_k_AHE', None)) extra_param_dict['h6_ahe_k'] = h6_ahe_k else: h6_ahe_k = 1 extra_param_dict['h6_ahe_k'] = h6_ahe_k if request.GET.get('h6_k_RMSHE', None) is not None: h6_rmshe_k = float(request.GET.get('h6_k_RMSHE', None)) extra_param_dict['h6_rmshe_k'] = h6_rmshe_k else: h6_rmshe_k = 1 extra_param_dict['h6_rmshe_k'] = h6_rmshe_k if float(request.GET.get('lm_x_bar', None)) != 1: lm_x_bar_p = float(request.GET.get('lm_x_bar', None)) extra_param_dict['lm_x_bar_p'] = lm_x_bar_p else: lm_x_bar_p = None extra_param_dict['lm_x_bar_p'] = lm_x_bar_p if float(request.GET.get('d1_p_x_bar', None)) != 1: d1_p_x_bar_p = float(request.GET.get('d1_p_x_bar', None)) extra_param_dict['d1_p_x_bar_p'] = d1_p_x_bar_p else: d1_p_x_bar_p = None extra_param_dict['d1_p_x_bar_p'] = d1_p_x_bar_p '''Merge Data''' merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df) merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df) '''Plotting Data''' # Creating the Table Based on User Input table = hs.make_table( merged_dataframe=merged_df, metrics=selected_metric_abbr, # remove_neg=remove_neg, # remove_zero=remove_zero, mase_m=extra_param_dict['mase_m'], dmod_j=extra_param_dict['dmod_j'], nse_mod_j=extra_param_dict['nse_mod_j'], h6_mhe_k=extra_param_dict['h6_mhe_k'], h6_ahe_k=extra_param_dict['h6_ahe_k'], h6_rmshe_k=extra_param_dict['h6_rmshe_k'], d1_p_obs_bar_p=extra_param_dict['d1_p_x_bar_p'], lm_x_obs_bar_p=extra_param_dict['lm_x_bar_p'], # seasonal_periods=all_date_range_list ) table = table.round(decimals=2) table_html = table.transpose() table_html = table_html.to_html( classes="table table-hover table-striped").replace( 'border="1"', 'border="0"') # Creating the Table Based on User Input table2 = hs.make_table( merged_dataframe=merged_df2, metrics=selected_metric_abbr, # remove_neg=remove_neg, # remove_zero=remove_zero, mase_m=extra_param_dict['mase_m'], dmod_j=extra_param_dict['dmod_j'], nse_mod_j=extra_param_dict['nse_mod_j'], h6_mhe_k=extra_param_dict['h6_mhe_k'], h6_ahe_k=extra_param_dict['h6_ahe_k'], h6_rmshe_k=extra_param_dict['h6_rmshe_k'], d1_p_obs_bar_p=extra_param_dict['d1_p_x_bar_p'], lm_x_obs_bar_p=extra_param_dict['lm_x_bar_p'], # seasonal_periods=all_date_range_list ) table2 = table2.round(decimals=2) table_html2 = table.transpose() table_html2 = table_html2.to_html( classes="table table-hover table-striped").replace( 'border="1"', 'border="0"') table2 = table2.rename( index={'Full Time Series': 'Corrected Full Time Series'}) table = table.rename( index={'Full Time Series': 'Original Full Time Series'}) table_html2 = table2.transpose() table_html1 = table.transpose() table_final = pd.merge(table_html1, table_html2, right_index=True, left_index=True) table_html2 = table_html2.to_html( classes="table table-hover table-striped", table_id="corrected_1").replace('border="1"', 'border="0"') table_final_html = table_final.to_html( classes="table table-hover table-striped", table_id="corrected_1").replace('border="1"', 'border="0"') return HttpResponse(table_final_html) except Exception: traceback.print_exc() return JsonResponse( {'error': 'No data found for the selected station.'})
index_col=0) ny_obs_cms = ny_obs_full.drop(columns=["Flow-cfs", "Estimation"]) or_obs_full = pd.read_csv( '/home/chrisedwards/Documents/gauge_data/14306500_1-1-1980_12-31-2014.csv', index_col=0) or_obs_cms = or_obs_full.drop(columns=["Flow-cfs", "Estimation"]) list_obs_df = [az_obs_cms, id_obs_cms, mo_obs_cms, ny_obs_cms, or_obs_cms] x_df = or_obs_full.drop(columns=['Flow-cms', 'Estimation']) # ------------------------------------------------------------------------------------------------------ merged_df = hd.merge_data(sim_df=x_df, obs_df=or_obs_cms, column_names=['Delete', 'Observed']) temp_da = hd.daily_average(merged_df) avg_obs = temp_da.drop(columns='Delete') or_lowres.index = pd.date_range("2001-01-01", "2001-12-31").strftime("%m/%d") group = [or_lowres, avg_obs] rapid_vs_obs = pd.concat(group, axis=1) rapid_vs_obs.drop(index='02/29', inplace=True) # print(rapid_vs_obs) labels = ['Datetime', 'Streamflow (cms)'] hv.plot(merged_data_df=rapid_vs_obs,
'''Defining the simulated return periods thresholds''' sim_2_threshold = simulated_rp['return_period_2'].loc[ simulated_rp.index == float('{0}.0'.format(comid))].values[0] sim_5_threshold = simulated_rp['return_period_5'].loc[ simulated_rp.index == float('{0}.0'.format(comid))].values[0] sim_10_threshold = simulated_rp['return_period_10'].loc[ simulated_rp.index == float('{0}.0'.format(comid))].values[0] sim_25_threshold = simulated_rp['return_period_25'].loc[ simulated_rp.index == float('{0}.0'.format(comid))].values[0] sim_50_threshold = simulated_rp['return_period_50'].loc[ simulated_rp.index == float('{0}.0'.format(comid))].values[0] sim_100_threshold = simulated_rp['return_period_100'].loc[ simulated_rp.index == float('{0}.0'.format(comid))].values[0] merged_df = hd.merge_data(sim_df=simulated_df, obs_df=historical_df) historical_df = merged_df.iloc[:, 1].to_frame() simulated_df = merged_df.iloc[:, 0].to_frame() df1 = historical_df.loc[(historical_df['Observed'] < obs_2_threshold)] df2 = historical_df.loc[(historical_df['Observed'] >= obs_2_threshold)] df3 = historical_df.loc[(historical_df['Observed'] >= obs_5_threshold)] df4 = historical_df.loc[(historical_df['Observed'] >= obs_10_threshold)] df5 = historical_df.loc[(historical_df['Observed'] >= obs_25_threshold)] df6 = historical_df.loc[(historical_df['Observed'] >= obs_50_threshold)] df7 = historical_df.loc[(historical_df['Observed'] >= obs_100_threshold)] event_return_period = 0 if len(df2.index) > 0:
list_avg_condensed.sort() # Now there is a dictionary called 'seas_avg_dict' that has the seasonal averages stored in a pandas DataFrame. # Each array has the datetime and flowrate. # Each data frame is named in the format '{state}-{streamID}' (eg: 'az-7' or 'col-9'). # There are a total of 180 streams, or 180 keys in the dictionary: seas_avg_dict['az-7'] # list_streams_condensed = list of all the stream names, or names of the data frames. # *************************************************************************************************************** # *************************************************************************************************************** az_9 = streamflow_dict['mo-7'] az_avg_9 = seas_avg_dict['mo-avg-7'] merged_df = hd.merge_data(sim_df=az_9, obs_df=streamflow_dict['az-21'], column_names=['9-calc', '21-calc']) dailyavg2 = hd.daily_average(merged_df) avg_calc = dailyavg2.drop(columns='21-calc') az_avg_9.index = pd.date_range("2001-01-01", "2001-12-31").strftime("%m/%d") group = [avg_calc, az_avg_9] calc_vs_ncdf = pd.concat(group, axis=1) calc_vs_ncdf.drop(index='02/29', inplace=True) labels = ['Datetime', 'Streamflow (cms)'] hv.plot(merged_data_df=calc_vs_ncdf, title="MO Daily Avg: Hydrostats vs Rapid ", linestyles=['r-', 'k-'], legend=('Hydrostats', 'Rapid NetCDF'),
'. COMID - ' + str(comid) + '.png') obsData = pd.DataFrame({ 'datetime': dates_obs, 'observed volume (BCM)': obs_df.iloc[:, 0].values }) obsData.set_index(['datetime'], inplace=True) simData = pd.DataFrame({ 'datetime': dates_sim, 'simulated volume (BCM)': sim_df.iloc[:, 0].values }) simData.set_index(['datetime'], inplace=True) #Merging the Data merged_df = hd.merge_data(sim_df=simData, obs_df=obsData, column_names=('Simulated', 'Observed')) '''Tables and Plots''' # Appending the table to the final table table = hs.make_table(merged_df, metrics=[ 'ME', 'MAE', 'MAPE', 'RMSE', 'NRMSE (Mean)', 'NSE', 'KGE (2009)', 'KGE (2012)', 'R (Pearson)', 'R (Spearman)', 'r2' ], location=name, remove_neg=False, remove_zero=False) all_station_table = all_station_table.append(table) #Making plots for all the stations
def corrected_scatterplots(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame, merged_sim_obs: pd.DataFrame = False, merged_cor_obs: pd.DataFrame = False, titles: dict = None, outformat: str = 'plotly') -> go.Figure or str: """ Creates a plot of corrected discharge, observered discharge, and simulated discharge. This function uses hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full comparison of bias correction, you can provide them to save time Args: corrected: the response from the geoglows.bias.correct_historical_simulation function simulated: the csv response from historic_simulation observed: the dataframe of observed data. Must have a datetime index and a single column of flow values merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed) merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed) outformat: either 'plotly' or 'plotly_html' (default plotly) titles: (dict) Extra info to show on the title of the plot. For example: {'Reach ID': 1234567, 'Drainage Area': '1000km^2'} Returns: plotly.GraphObject: plotly object, especially for use with python notebooks and the .show() method """ if corrected is False and simulated is False and observed is False: if merged_sim_obs is not False and merged_cor_obs is not False: pass # if you provided the merged dataframes already, we use those else: # merge the datasets together merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed) merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed) # get the min/max values for plotting the 45 degree line min_value = min(min(merged_sim_obs.iloc[:, 1].values), min(merged_sim_obs.iloc[:, 0].values)) max_value = max(max(merged_sim_obs.iloc[:, 1].values), max(merged_sim_obs.iloc[:, 0].values)) # do a linear regression on both of the merged dataframes slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(merged_sim_obs.iloc[:, 0].values, merged_sim_obs.iloc[:, 1].values) slope2, intercept2, r_value2, p_value2, std_err2 = scipy.stats.linregress(merged_cor_obs.iloc[:, 0].values, merged_cor_obs.iloc[:, 1].values) scatter_sets = [ go.Scatter( x=merged_sim_obs.iloc[:, 0].values, y=merged_sim_obs.iloc[:, 1].values, mode='markers', name='Original Data', marker=dict(color='#ef553b') ), go.Scatter( x=merged_cor_obs.iloc[:, 0].values, y=merged_cor_obs.iloc[:, 1].values, mode='markers', name='Corrected', marker=dict(color='#00cc96') ), go.Scatter( x=[min_value, max_value], y=[min_value, max_value], mode='lines', name='45 degree line', line=dict(color='black') ), go.Scatter( x=[min_value, max_value], y=[slope * min_value + intercept, slope * max_value + intercept], mode='lines', name=f'Y = {round(slope, 2)}x + {round(intercept, 2)} (Original)', line=dict(color='red') ), go.Scatter( x=[min_value, max_value], y=[slope2 * min_value + intercept2, slope2 * max_value + intercept2], mode='lines', name=f'Y = {round(slope2, 2)}x + {round(intercept2, 2)} (Corrected)', line=dict(color='green') ) ] updatemenus = [ dict(active=0, buttons=[dict(label='Linear Scale', method='update', args=[{'visible': [True, True]}, {'title': 'Linear scale', 'yaxis': {'type': 'linear'}}]), dict(label='Log Scale', method='update', args=[{'visible': [True, True]}, {'title': 'Log scale', 'xaxis': {'type': 'log'}, 'yaxis': {'type': 'log'}}]), ] ) ] layout = go.Layout(title=_build_title('Bias Correction Scatter Plot', titles), xaxis=dict(title='Simulated', ), yaxis=dict(title='Observed', autorange=True), showlegend=True, updatemenus=updatemenus) if outformat == 'plotly': return go.Figure(data=scatter_sets, layout=layout) elif outformat == 'plotly_html': return offline_plot( go.Figure(data=scatter_sets, layout=layout), config={'autosizable': True, 'responsive': True}, output_type='div', include_plotlyjs=False ) raise ValueError('Invalid outformat chosen. Choose plotly or plotly_html')