def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', exclusion_mask_dict={}): """ This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function. This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input. Args: predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster. benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster. agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive. stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes. stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes. Returns: stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values. """ # Get cell size of benchmark raster. raster = rasterio.open(predicted_raster_path) t = raster.transform cell_x = t[0] cell_y = t[4] cell_area = abs(cell_x*cell_y) additional_layers_dict = {} # Create path to additional_layer. Could put conditionals here to create path according to some version. Simply use stats_mode for now. Must be raster. if len(stats_modes_list) > 1: additional_layers_dict = {} for stats_mode in stats_modes_list: if stats_mode != 'total_area': additional_layer_path = os.path.join(TEST_CASES_DIR, test_id, 'additional_layers', 'inclusion_areas', stats_mode + '.tif') if os.path.exists(additional_layer_path): additional_layers_dict.update({stats_mode: additional_layer_path}) else: print("No " + stats_mode + " inclusion area found for " + test_id + ". Moving on with processing...") # Get contingency table from two rasters. contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster, mask_values=mask_values, additional_layers_dict=additional_layers_dict, exclusion_mask_dict=exclusion_mask_dict) stats_dictionary = {} for stats_mode in contingency_table_dictionary: true_negatives = contingency_table_dictionary[stats_mode]['true_negatives'] false_negatives = contingency_table_dictionary[stats_mode]['false_negatives'] false_positives = contingency_table_dictionary[stats_mode]['false_positives'] true_positives = contingency_table_dictionary[stats_mode]['true_positives'] masked_count = contingency_table_dictionary[stats_mode]['masked_count'] # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None). mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count) # Write the mode_stats_dictionary to the stats_csv. if stats_csv != None: stats_csv = os.path.join(os.path.split(stats_csv)[0], stats_mode + '_stats.csv') df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value']) df.to_csv(stats_csv) # Write the mode_stats_dictionary to the stats_json. if stats_json != None: stats_json = os.path.join(os.path.split(stats_csv)[0], stats_mode + '_stats.json') with open(stats_json, "w") as outfile: json.dump(mode_stats_dictionary, outfile) stats_dictionary.update({stats_mode: mode_stats_dictionary}) return stats_dictionary
def aggregate_parameter_sets(huc_list_path, calibration_stat_folder, summary_file, mannings_json): outfolder = os.path.dirname(summary_file) aggregate_output_dir = os.path.join(outfolder, 'aggregate_metrics') if not os.path.exists(aggregate_output_dir): os.makedirs(aggregate_output_dir) mannings_summary_table = pd.DataFrame(columns=[ 'metric', 'value', 'stream_order', 'mannings_n', 'huc', 'interval' ]) with open(huc_list_path) as f: huc_list = [huc.rstrip() for huc in f] for huc in huc_list: branch_dir = os.path.join('data', 'test_cases', str(huc) + '_ble', 'performance_archive', 'development_versions', calibration_stat_folder) for stream_order in os.listdir(branch_dir): stream_order_dir = os.path.join(branch_dir, stream_order) for mannings_value in os.listdir(stream_order_dir): mannings_value_dir = os.path.join(stream_order_dir, mannings_value) for flood_recurrence in os.listdir(mannings_value_dir): flood_recurrence_dir = os.path.join( mannings_value_dir, flood_recurrence) total_area_stats = pd.read_csv(os.path.join( flood_recurrence_dir, 'total_area_stats.csv'), index_col=0) total_area_stats = total_area_stats.loc[[ 'true_positives_count', 'true_negatives_count', 'false_positives_count', 'false_negatives_count', 'masked_count', 'cell_area_m2', 'CSI' ], :] total_area_stats = total_area_stats.reset_index() total_area_stats_table = pd.DataFrame({ 'metric': total_area_stats.iloc[:, 0], 'value': total_area_stats.iloc[:, 1], 'stream_order': stream_order, 'mannings_n': mannings_value, 'huc': huc, 'interval': flood_recurrence }) mannings_summary_table = mannings_summary_table.append( total_area_stats_table, ignore_index=True) mannings_summary_table.to_csv(summary_file, index=False) ## calculate optimal parameter set from utils.shared_functions import compute_stats_from_contingency_table true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0 list_to_write = [[ 'metric', 'value', 'stream_order', 'mannings_value', 'return_interval' ]] # Initialize header. for stream_order in mannings_summary_table.stream_order.unique(): for return_interval in mannings_summary_table.interval.unique(): for mannings_value in mannings_summary_table.mannings_n.unique(): true_positives = mannings_summary_table.loc[ (mannings_summary_table['interval'] == return_interval) & (mannings_summary_table['stream_order'] == stream_order) & (mannings_summary_table['mannings_n'] == mannings_value) & (mannings_summary_table['metric'] == 'true_positives_count' ), 'value'].sum() true_negatives = mannings_summary_table.loc[ (mannings_summary_table['interval'] == return_interval) & (mannings_summary_table['stream_order'] == stream_order) & (mannings_summary_table['mannings_n'] == mannings_value) & (mannings_summary_table['metric'] == 'true_negatives_count' ), 'value'].sum() false_positives = mannings_summary_table.loc[ (mannings_summary_table['interval'] == return_interval) & (mannings_summary_table['stream_order'] == stream_order) & (mannings_summary_table['mannings_n'] == mannings_value) & (mannings_summary_table['metric'] == 'false_positives_count'), 'value'].sum() false_negatives = mannings_summary_table.loc[ (mannings_summary_table['interval'] == return_interval) & (mannings_summary_table['stream_order'] == stream_order) & (mannings_summary_table['mannings_n'] == mannings_value) & (mannings_summary_table['metric'] == 'false_negatives_count'), 'value'].sum() masked_count = mannings_summary_table.loc[ (mannings_summary_table['interval'] == return_interval) & (mannings_summary_table['stream_order'] == stream_order) & (mannings_summary_table['mannings_n'] == mannings_value) & (mannings_summary_table['metric'] == 'masked_count'), 'value'].sum() cell_area = mannings_summary_table.loc[ (mannings_summary_table['interval'] == return_interval) & (mannings_summary_table['stream_order'] == stream_order) & (mannings_summary_table['mannings_n'] == mannings_value) & (mannings_summary_table['metric'] == 'cell_area_m2'), 'value'].sum() # Pass all sums to shared function to calculate metrics. stats_dict = compute_stats_from_contingency_table( true_negatives, false_negatives, false_positives, true_positives, cell_area=cell_area, masked_count=masked_count) for stat in stats_dict: list_to_write.append([ stat, stats_dict[stat], stream_order, mannings_value, return_interval ]) # Map path to output directory for aggregate metrics. output_file = os.path.join( aggregate_output_dir, 'aggregate_metrics_mannings_calibration_by_streamorder.csv') with open(output_file, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerows(list_to_write) print("Finished aggregating metrics over " + str(len(huc_list)) + " test cases.") print('Writing optimal mannings parameter set') manning_dict = {} list_to_write_pd = pd.read_csv(output_file) for stream_order in list_to_write_pd.stream_order.unique(): interval_100 = list_to_write_pd.loc[ (list_to_write_pd['stream_order'] == stream_order) & (list_to_write_pd['metric'] == 'CSI') & (list_to_write_pd['return_interval'] == '100yr'), 'value'].max() interval_500 = list_to_write_pd.loc[ (list_to_write_pd['stream_order'] == stream_order) & (list_to_write_pd['metric'] == 'CSI') & (list_to_write_pd['return_interval'] == '500yr'), 'value'].max() mannings_100yr = list_to_write_pd.loc[ (list_to_write_pd['stream_order'] == stream_order) & (list_to_write_pd['metric'] == 'CSI') & (list_to_write_pd['return_interval'] == '100yr') & (list_to_write_pd['value'] == interval_100), 'mannings_value'] mannings_500yr = list_to_write_pd.loc[ (list_to_write_pd['stream_order'] == stream_order) & (list_to_write_pd['metric'] == 'CSI') & (list_to_write_pd['return_interval'] == '500yr') & (list_to_write_pd['value'] == interval_500), 'mannings_value'] if (len(mannings_100yr) == 1) & (len(mannings_500yr) == 1): if mannings_100yr.iloc[0] == mannings_500yr.iloc[0]: manning_dict[str(stream_order)] = mannings_100yr.iloc[0] else: print('100yr and 500yr optimal mannings vary by ' + str( round(abs(mannings_100yr.iloc[0] - mannings_500yr.iloc[0]), 2)) + " for stream order " + str(stream_order)) print('Selecting optimal mannings n for 100yr event') manning_dict[str(stream_order)] = mannings_100yr.iloc[0] elif (len(mannings_100yr) > 1) or (len(mannings_500yr) > 1): print('multiple values achieve optimal results ' + " for stream order " + str(stream_order)) print('Selecting optimal mannings n for 100yr event') manning_dict[str(stream_order)] = mannings_100yr.iloc[0] for n in range(1, 15): if str(n) not in manning_dict: manning_dict[str(n)] = 0.06 with open(mannings_json, "w") as outfile: json.dump(manning_dict, outfile)
def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfolder=""): # Read hucs into list. if hucs != "": huc_list = [line.rstrip('\n') for line in open(hucs)] else: huc_list = None if config == "DEV": config_version = "development_versions" elif config == "PREV": config_version = "previous_versions" # Make directory to store output aggregates. if special_string != "": special_string = "_" + special_string aggregate_output_dir = os.path.join( outfolder, 'aggregate_metrics', branch + '_aggregate_metrics' + special_string) if not os.path.exists(aggregate_output_dir): os.makedirs(aggregate_output_dir) test_cases_dir_list = os.listdir(TEST_CASES_DIR) for magnitude in [ '100yr', '500yr', 'action', 'minor', 'moderate', 'major' ]: huc_path_list = [['huc', 'path']] true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0 for test_case in test_cases_dir_list: if test_case not in [ 'other', 'validation_data_ble', 'validation_data_legacy', 'validation_data_ahps' ]: branch_results_dir = os.path.join(TEST_CASES_DIR, test_case, 'performance_archive', config_version, branch) huc = test_case.split('_')[0] # Check that the huc is in the list of hucs to aggregate. if huc_list != None and huc not in huc_list: continue stats_json_path = os.path.join(branch_results_dir, magnitude, 'total_area_stats.json') # If there is a stats json for the test case and branch name, use it when aggregating stats. if os.path.exists(stats_json_path): json_dict = json.load(open(stats_json_path)) true_positives += json_dict['true_positives_count'] true_negatives += json_dict['true_negatives_count'] false_positives += json_dict['false_positives_count'] false_negatives += json_dict['false_negatives_count'] masked_count += json_dict['masked_count'] cell_area = json_dict['cell_area_m2'] huc_path_list.append([huc, stats_json_path]) if cell_area == 0: continue # Pass all sums to shared function to calculate metrics. stats_dict = compute_stats_from_contingency_table( true_negatives, false_negatives, false_positives, true_positives, cell_area=cell_area, masked_count=masked_count) list_to_write = [['metric', 'value']] # Initialize header. for stat in stats_dict: list_to_write.append([stat, stats_dict[stat]]) # Map path to output directory for aggregate metrics. output_file = os.path.join( aggregate_output_dir, branch + '_aggregate_metrics_' + magnitude + special_string + '.csv') if cell_area != 0: with open(output_file, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerows(list_to_write) csv_writer.writerow([]) csv_writer.writerows(huc_path_list) print() print("Finished aggregating for the '" + magnitude + "' magnitude. Aggregated metrics over " + str(len(huc_path_list) - 1) + " test cases.") print() print("Results are at: " + output_file) print()