Exemplo n.º 1
0
def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', exclusion_mask_dict={}):
    """
    This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function.
    This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input.

    Args:
        predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster.
        benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster.
        agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive.
        stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes.
        stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes.

    Returns:
        stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values.
    """

    # Get cell size of benchmark raster.
    raster = rasterio.open(predicted_raster_path)
    t = raster.transform
    cell_x = t[0]
    cell_y = t[4]
    cell_area = abs(cell_x*cell_y)

    additional_layers_dict = {}
    # Create path to additional_layer. Could put conditionals here to create path according to some version. Simply use stats_mode for now. Must be raster.
    if len(stats_modes_list) > 1:
        additional_layers_dict = {}
        for stats_mode in stats_modes_list:
            if stats_mode != 'total_area':
                additional_layer_path = os.path.join(TEST_CASES_DIR, test_id, 'additional_layers', 'inclusion_areas', stats_mode + '.tif')
                if os.path.exists(additional_layer_path):
                    additional_layers_dict.update({stats_mode: additional_layer_path})
                else:
                    print("No " + stats_mode + " inclusion area found for " + test_id + ". Moving on with processing...")

    # Get contingency table from two rasters.
    contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster, mask_values=mask_values, additional_layers_dict=additional_layers_dict, exclusion_mask_dict=exclusion_mask_dict)

    stats_dictionary = {}

    for stats_mode in contingency_table_dictionary:
        true_negatives = contingency_table_dictionary[stats_mode]['true_negatives']
        false_negatives = contingency_table_dictionary[stats_mode]['false_negatives']
        false_positives = contingency_table_dictionary[stats_mode]['false_positives']
        true_positives = contingency_table_dictionary[stats_mode]['true_positives']
        masked_count = contingency_table_dictionary[stats_mode]['masked_count']

        # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None).
        mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count)

        # Write the mode_stats_dictionary to the stats_csv.
        if stats_csv != None:
            stats_csv = os.path.join(os.path.split(stats_csv)[0], stats_mode + '_stats.csv')
            df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value'])
            df.to_csv(stats_csv)

        # Write the mode_stats_dictionary to the stats_json.
        if stats_json != None:
            stats_json = os.path.join(os.path.split(stats_csv)[0], stats_mode + '_stats.json')
            with open(stats_json, "w") as outfile:
                json.dump(mode_stats_dictionary, outfile)

        stats_dictionary.update({stats_mode: mode_stats_dictionary})

    return stats_dictionary
Exemplo n.º 2
0
def aggregate_parameter_sets(huc_list_path, calibration_stat_folder,
                             summary_file, mannings_json):

    outfolder = os.path.dirname(summary_file)
    aggregate_output_dir = os.path.join(outfolder, 'aggregate_metrics')

    if not os.path.exists(aggregate_output_dir):
        os.makedirs(aggregate_output_dir)

    mannings_summary_table = pd.DataFrame(columns=[
        'metric', 'value', 'stream_order', 'mannings_n', 'huc', 'interval'
    ])

    with open(huc_list_path) as f:
        huc_list = [huc.rstrip() for huc in f]

    for huc in huc_list:
        branch_dir = os.path.join('data', 'test_cases',
                                  str(huc) + '_ble', 'performance_archive',
                                  'development_versions',
                                  calibration_stat_folder)
        for stream_order in os.listdir(branch_dir):
            stream_order_dir = os.path.join(branch_dir, stream_order)
            for mannings_value in os.listdir(stream_order_dir):
                mannings_value_dir = os.path.join(stream_order_dir,
                                                  mannings_value)
                for flood_recurrence in os.listdir(mannings_value_dir):
                    flood_recurrence_dir = os.path.join(
                        mannings_value_dir, flood_recurrence)
                    total_area_stats = pd.read_csv(os.path.join(
                        flood_recurrence_dir, 'total_area_stats.csv'),
                                                   index_col=0)
                    total_area_stats = total_area_stats.loc[[
                        'true_positives_count', 'true_negatives_count',
                        'false_positives_count', 'false_negatives_count',
                        'masked_count', 'cell_area_m2', 'CSI'
                    ], :]
                    total_area_stats = total_area_stats.reset_index()
                    total_area_stats_table = pd.DataFrame({
                        'metric':
                        total_area_stats.iloc[:, 0],
                        'value':
                        total_area_stats.iloc[:, 1],
                        'stream_order':
                        stream_order,
                        'mannings_n':
                        mannings_value,
                        'huc':
                        huc,
                        'interval':
                        flood_recurrence
                    })
                    mannings_summary_table = mannings_summary_table.append(
                        total_area_stats_table, ignore_index=True)

    mannings_summary_table.to_csv(summary_file, index=False)

    ## calculate optimal parameter set
    from utils.shared_functions import compute_stats_from_contingency_table

    true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0

    list_to_write = [[
        'metric', 'value', 'stream_order', 'mannings_value', 'return_interval'
    ]]  # Initialize header.
    for stream_order in mannings_summary_table.stream_order.unique():
        for return_interval in mannings_summary_table.interval.unique():
            for mannings_value in mannings_summary_table.mannings_n.unique():
                true_positives = mannings_summary_table.loc[
                    (mannings_summary_table['interval'] == return_interval) &
                    (mannings_summary_table['stream_order'] == stream_order) &
                    (mannings_summary_table['mannings_n'] == mannings_value) &
                    (mannings_summary_table['metric'] == 'true_positives_count'
                     ), 'value'].sum()
                true_negatives = mannings_summary_table.loc[
                    (mannings_summary_table['interval'] == return_interval) &
                    (mannings_summary_table['stream_order'] == stream_order) &
                    (mannings_summary_table['mannings_n'] == mannings_value) &
                    (mannings_summary_table['metric'] == 'true_negatives_count'
                     ), 'value'].sum()
                false_positives = mannings_summary_table.loc[
                    (mannings_summary_table['interval'] == return_interval) &
                    (mannings_summary_table['stream_order'] == stream_order) &
                    (mannings_summary_table['mannings_n'] == mannings_value) &
                    (mannings_summary_table['metric'] ==
                     'false_positives_count'), 'value'].sum()
                false_negatives = mannings_summary_table.loc[
                    (mannings_summary_table['interval'] == return_interval) &
                    (mannings_summary_table['stream_order'] == stream_order) &
                    (mannings_summary_table['mannings_n'] == mannings_value) &
                    (mannings_summary_table['metric'] ==
                     'false_negatives_count'), 'value'].sum()
                masked_count = mannings_summary_table.loc[
                    (mannings_summary_table['interval'] == return_interval) &
                    (mannings_summary_table['stream_order'] == stream_order) &
                    (mannings_summary_table['mannings_n'] == mannings_value) &
                    (mannings_summary_table['metric'] == 'masked_count'),
                    'value'].sum()

                cell_area = mannings_summary_table.loc[
                    (mannings_summary_table['interval'] == return_interval) &
                    (mannings_summary_table['stream_order'] == stream_order) &
                    (mannings_summary_table['mannings_n'] == mannings_value) &
                    (mannings_summary_table['metric'] == 'cell_area_m2'),
                    'value'].sum()

                # Pass all sums to shared function to calculate metrics.
                stats_dict = compute_stats_from_contingency_table(
                    true_negatives,
                    false_negatives,
                    false_positives,
                    true_positives,
                    cell_area=cell_area,
                    masked_count=masked_count)

                for stat in stats_dict:
                    list_to_write.append([
                        stat, stats_dict[stat], stream_order, mannings_value,
                        return_interval
                    ])

    # Map path to output directory for aggregate metrics.
    output_file = os.path.join(
        aggregate_output_dir,
        'aggregate_metrics_mannings_calibration_by_streamorder.csv')

    with open(output_file, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerows(list_to_write)

    print("Finished aggregating metrics over " + str(len(huc_list)) +
          " test cases.")

    print('Writing optimal mannings parameter set')

    manning_dict = {}
    list_to_write_pd = pd.read_csv(output_file)
    for stream_order in list_to_write_pd.stream_order.unique():
        interval_100 = list_to_write_pd.loc[
            (list_to_write_pd['stream_order'] == stream_order) &
            (list_to_write_pd['metric'] == 'CSI') &
            (list_to_write_pd['return_interval'] == '100yr'), 'value'].max()
        interval_500 = list_to_write_pd.loc[
            (list_to_write_pd['stream_order'] == stream_order) &
            (list_to_write_pd['metric'] == 'CSI') &
            (list_to_write_pd['return_interval'] == '500yr'), 'value'].max()
        mannings_100yr = list_to_write_pd.loc[
            (list_to_write_pd['stream_order'] == stream_order) &
            (list_to_write_pd['metric'] == 'CSI') &
            (list_to_write_pd['return_interval'] == '100yr') &
            (list_to_write_pd['value'] == interval_100), 'mannings_value']
        mannings_500yr = list_to_write_pd.loc[
            (list_to_write_pd['stream_order'] == stream_order) &
            (list_to_write_pd['metric'] == 'CSI') &
            (list_to_write_pd['return_interval'] == '500yr') &
            (list_to_write_pd['value'] == interval_500), 'mannings_value']
        if (len(mannings_100yr) == 1) & (len(mannings_500yr) == 1):
            if mannings_100yr.iloc[0] == mannings_500yr.iloc[0]:
                manning_dict[str(stream_order)] = mannings_100yr.iloc[0]
            else:
                print('100yr and 500yr optimal mannings vary by ' + str(
                    round(abs(mannings_100yr.iloc[0] -
                              mannings_500yr.iloc[0]), 2)) +
                      " for stream order " + str(stream_order))
                print('Selecting optimal mannings n for 100yr event')
                manning_dict[str(stream_order)] = mannings_100yr.iloc[0]
        elif (len(mannings_100yr) > 1) or (len(mannings_500yr) > 1):
            print('multiple values achieve optimal results ' +
                  " for stream order " + str(stream_order))
            print('Selecting optimal mannings n for 100yr event')
            manning_dict[str(stream_order)] = mannings_100yr.iloc[0]

    for n in range(1, 15):
        if str(n) not in manning_dict:
            manning_dict[str(n)] = 0.06

    with open(mannings_json, "w") as outfile:
        json.dump(manning_dict, outfile)
Exemplo n.º 3
0
def aggregate_metrics(config="DEV",
                      branch="",
                      hucs="",
                      special_string="",
                      outfolder=""):

    # Read hucs into list.
    if hucs != "":
        huc_list = [line.rstrip('\n') for line in open(hucs)]

    else:
        huc_list = None

    if config == "DEV":
        config_version = "development_versions"
    elif config == "PREV":
        config_version = "previous_versions"

    # Make directory to store output aggregates.
    if special_string != "":
        special_string = "_" + special_string
    aggregate_output_dir = os.path.join(
        outfolder, 'aggregate_metrics',
        branch + '_aggregate_metrics' + special_string)
    if not os.path.exists(aggregate_output_dir):
        os.makedirs(aggregate_output_dir)

    test_cases_dir_list = os.listdir(TEST_CASES_DIR)

    for magnitude in [
            '100yr', '500yr', 'action', 'minor', 'moderate', 'major'
    ]:
        huc_path_list = [['huc', 'path']]
        true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0

        for test_case in test_cases_dir_list:

            if test_case not in [
                    'other', 'validation_data_ble', 'validation_data_legacy',
                    'validation_data_ahps'
            ]:
                branch_results_dir = os.path.join(TEST_CASES_DIR, test_case,
                                                  'performance_archive',
                                                  config_version, branch)

                huc = test_case.split('_')[0]
                # Check that the huc is in the list of hucs to aggregate.
                if huc_list != None and huc not in huc_list:
                    continue

                stats_json_path = os.path.join(branch_results_dir, magnitude,
                                               'total_area_stats.json')

                # If there is a stats json for the test case and branch name, use it when aggregating stats.
                if os.path.exists(stats_json_path):
                    json_dict = json.load(open(stats_json_path))

                    true_positives += json_dict['true_positives_count']
                    true_negatives += json_dict['true_negatives_count']
                    false_positives += json_dict['false_positives_count']
                    false_negatives += json_dict['false_negatives_count']
                    masked_count += json_dict['masked_count']

                    cell_area = json_dict['cell_area_m2']

                    huc_path_list.append([huc, stats_json_path])

            if cell_area == 0:
                continue

            # Pass all sums to shared function to calculate metrics.
            stats_dict = compute_stats_from_contingency_table(
                true_negatives,
                false_negatives,
                false_positives,
                true_positives,
                cell_area=cell_area,
                masked_count=masked_count)

            list_to_write = [['metric', 'value']]  # Initialize header.

            for stat in stats_dict:
                list_to_write.append([stat, stats_dict[stat]])

            # Map path to output directory for aggregate metrics.
            output_file = os.path.join(
                aggregate_output_dir, branch + '_aggregate_metrics_' +
                magnitude + special_string + '.csv')

        if cell_area != 0:
            with open(output_file, 'w', newline='') as csvfile:
                csv_writer = csv.writer(csvfile)
                csv_writer.writerows(list_to_write)
                csv_writer.writerow([])
                csv_writer.writerows(huc_path_list)

            print()
            print("Finished aggregating for the '" + magnitude +
                  "' magnitude. Aggregated metrics over " +
                  str(len(huc_path_list) - 1) + " test cases.")
            print()
            print("Results are at: " + output_file)
            print()