Python subset_by_index примеры использования

Язык программирования: Python

Пространство имен/Пакет: ml4rt.io.prediction_io

Метод/Функция: subset_by_index

Примеров на hotexamples.com: 6

Python subset_by_index - 6 примеров найдено. Это лучшие примеры Python кода для ml4rt.io.prediction_io.subset_by_index, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: split_predictions_by_site.py Проект: thunderhoser/ml4rt

def _run(input_file_name, top_output_dir_name):
    """Splits predictions by site (point location).

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param top_output_dir_name: Same.
    :raises: ValueError: if any example cannot be assigned to a site.
    """

    # Read data.
    print('Reading data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)
    example_metadata_dict = example_utils.parse_example_ids(
        prediction_dict[prediction_io.EXAMPLE_IDS_KEY])

    example_latitudes_deg_n = number_rounding.round_to_nearest(
        example_metadata_dict[example_utils.LATITUDES_KEY],
        LATLNG_TOLERANCE_DEG)
    example_longitudes_deg_e = number_rounding.round_to_nearest(
        example_metadata_dict[example_utils.LONGITUDES_KEY],
        LATLNG_TOLERANCE_DEG)
    example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west(
        example_longitudes_deg_e)

    num_examples = len(example_latitudes_deg_n)
    example_written_flags = numpy.full(num_examples, False, dtype=bool)

    site_names = list(SITE_NAME_TO_LATLNG.keys())
    num_sites = len(site_names)

    for j in range(num_sites):
        this_site_latitude_deg_n = SITE_NAME_TO_LATLNG[site_names[j]][0]
        this_site_longitude_deg_e = SITE_NAME_TO_LATLNG[site_names[j]][1]

        these_indices = numpy.where(
            numpy.logical_and(
                numpy.absolute(example_latitudes_deg_n -
                               this_site_latitude_deg_n) <=
                LATLNG_TOLERANCE_DEG,
                numpy.absolute(example_longitudes_deg_e -
                               this_site_longitude_deg_e) <=
                LATLNG_TOLERANCE_DEG))[0]

        this_prediction_dict = prediction_io.subset_by_index(
            prediction_dict=copy.deepcopy(prediction_dict),
            desired_indices=these_indices)

        this_output_file_name = '{0:s}/{1:s}/predictions.nc'.format(
            top_output_dir_name, site_names[j])
        print('Writing {0:d} examples to: "{1:s}"...'.format(
            len(these_indices), this_output_file_name))

        if len(these_indices) == 0:
            continue

        example_written_flags[these_indices] = True

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

    if numpy.all(example_written_flags):
        return

    # bad_latitudes_deg_n = (
    #     example_latitudes_deg_n[example_written_flags == False]
    # )
    # bad_longitudes_deg_e = (
    #     example_longitudes_deg_e[example_written_flags == False]
    # )
    # bad_coord_matrix = numpy.transpose(numpy.vstack((
    #     bad_latitudes_deg_n, bad_longitudes_deg_e
    # )))
    # bad_coord_matrix = numpy.unique(bad_coord_matrix, axis=0)
    # print(bad_coord_matrix)

    error_string = (
        '{0:d} of {1:d} examples could not be assigned to a site.  This is a '
        'BIG PROBLEM.').format(numpy.sum(example_written_flags == False),
                               num_examples)

    raise ValueError(error_string)

Пример #2

Показать файл

def _run(input_file_name, min_latitude_deg, max_latitude_deg, min_longitude_deg,
         max_longitude_deg, latitude_spacing_deg, longitude_spacing_deg,
         output_dir_name):
    """Splits predictions by spatial region.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param min_latitude_deg: Same.
    :param max_latitude_deg: Same.
    :param min_longitude_deg: Same.
    :param max_longitude_deg: Same.
    :param latitude_spacing_deg: Same.
    :param longitude_spacing_deg: Same.
    :param output_dir_name: Same.
    """

    # Read data.
    print('Reading data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)
    example_metadata_dict = example_utils.parse_example_ids(
        prediction_dict[prediction_io.EXAMPLE_IDS_KEY]
    )

    example_latitudes_deg = example_metadata_dict[example_utils.LATITUDES_KEY]
    example_longitudes_deg = example_metadata_dict[example_utils.LONGITUDES_KEY]

    these_limits_deg = numpy.array([
        min_latitude_deg, max_latitude_deg, min_longitude_deg, max_longitude_deg
    ])
    if numpy.any(numpy.isnan(these_limits_deg)):
        min_latitude_deg = numpy.min(example_latitudes_deg)
        max_latitude_deg = numpy.max(example_latitudes_deg)
        min_longitude_deg = numpy.min(example_longitudes_deg)
        max_longitude_deg = numpy.max(example_longitudes_deg)

    # Create grid.
    grid_point_latitudes_deg, grid_point_longitudes_deg = (
        misc.create_latlng_grid(
            min_latitude_deg=min_latitude_deg,
            max_latitude_deg=max_latitude_deg,
            latitude_spacing_deg=latitude_spacing_deg,
            min_longitude_deg=min_longitude_deg,
            max_longitude_deg=max_longitude_deg,
            longitude_spacing_deg=longitude_spacing_deg
        )
    )

    num_grid_rows = len(grid_point_latitudes_deg)
    num_grid_columns = len(grid_point_longitudes_deg)

    grid_edge_latitudes_deg, grid_edge_longitudes_deg = (
        grids.get_latlng_grid_cell_edges(
            min_latitude_deg=grid_point_latitudes_deg[0],
            min_longitude_deg=grid_point_longitudes_deg[0],
            lat_spacing_deg=numpy.diff(grid_point_latitudes_deg[:2])[0],
            lng_spacing_deg=numpy.diff(grid_point_longitudes_deg[:2])[0],
            num_rows=num_grid_rows, num_columns=num_grid_columns
        )
    )

    print(SEPARATOR_STRING)

    for i in range(num_grid_rows):
        for j in range(num_grid_columns):
            these_indices = grids.find_events_in_grid_cell(
                event_x_coords_metres=example_longitudes_deg,
                event_y_coords_metres=example_latitudes_deg,
                grid_edge_x_coords_metres=grid_edge_longitudes_deg,
                grid_edge_y_coords_metres=grid_edge_latitudes_deg,
                row_index=i, column_index=j, verbose=False
            )

            this_prediction_dict = prediction_io.subset_by_index(
                prediction_dict=copy.deepcopy(prediction_dict),
                desired_indices=these_indices
            )
            this_num_examples = len(
                this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]
            )

            if this_num_examples == 0:
                continue

            this_output_file_name = prediction_io.find_file(
                directory_name=output_dir_name, grid_row=i, grid_column=j,
                raise_error_if_missing=False
            )
            print('Writing {0:d} examples to: "{1:s}"...'.format(
                len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
                this_output_file_name
            ))

            prediction_io.write_file(
                netcdf_file_name=this_output_file_name,
                scalar_target_matrix=
                this_prediction_dict[prediction_io.SCALAR_TARGETS_KEY],
                vector_target_matrix=
                this_prediction_dict[prediction_io.VECTOR_TARGETS_KEY],
                scalar_prediction_matrix=
                this_prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY],
                vector_prediction_matrix=
                this_prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY],
                heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
                example_id_strings=
                this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
                model_file_name=
                this_prediction_dict[prediction_io.MODEL_FILE_KEY]
            )

    print(SEPARATOR_STRING)

    grid_metafile_name = prediction_io.find_grid_metafile(
        prediction_dir_name=output_dir_name, raise_error_if_missing=False
    )

    print('Writing grid metadata to: "{0:s}"...'.format(grid_metafile_name))
    prediction_io.write_grid_metafile(
        grid_point_latitudes_deg=grid_point_latitudes_deg,
        grid_point_longitudes_deg=grid_point_longitudes_deg,
        netcdf_file_name=grid_metafile_name
    )

Пример #3

Показать файл

def _run(input_prediction_file_name, num_examples_per_set, output_dir_name):
    """Finds best and worst heating-rate predictions.

    This is effectively the main method.

    :param input_prediction_file_name: See documentation at top of file.
    :param num_examples_per_set: Same.
    :param output_dir_name: Same.
    """

    error_checking.assert_is_greater(num_examples_per_set, 0)

    print('Reading data from: "{0:s}"...'.format(input_prediction_file_name))
    prediction_dict = prediction_io.read_file(input_prediction_file_name)

    model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY]
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    scalar_target_names = (
        generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY])
    down_index = scalar_target_names.index(
        example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME)
    up_index = scalar_target_names.index(
        example_utils.SHORTWAVE_TOA_UP_FLUX_NAME)

    targets_w_m02 = (
        prediction_dict[prediction_io.SCALAR_TARGETS_KEY][..., down_index] -
        prediction_dict[prediction_io.SCALAR_TARGETS_KEY][..., up_index])
    predictions_w_m02 = (
        prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY][..., down_index]
        - prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY][..., up_index])

    biases_w_m02 = predictions_w_m02 - targets_w_m02
    bias_matrix = numpy.expand_dims(biases_w_m02, axis=1)

    print(SEPARATOR_STRING)
    high_bias_indices, low_bias_indices, low_abs_error_indices = (
        misc_utils.find_best_and_worst_predictions(
            bias_matrix=bias_matrix,
            absolute_error_matrix=numpy.absolute(bias_matrix),
            num_examples_per_set=num_examples_per_set))
    print(SEPARATOR_STRING)

    high_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=high_bias_indices)
    high_bias_file_name = (
        '{0:s}/predictions_high-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest positive bias to: "{0:s}"...'.format(
        high_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=high_bias_file_name,
        scalar_target_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=high_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=high_bias_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    low_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_bias_indices)
    low_bias_file_name = (
        '{0:s}/predictions_low-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest negative bias to: "{0:s}"...'.format(
        low_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_bias_file_name,
        scalar_target_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY])

    low_abs_error_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_abs_error_indices)
    low_abs_error_file_name = (
        '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name))

    print(
        'Writing examples with smallest absolute error to: "{0:s}"...'.format(
            low_abs_error_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_abs_error_file_name,
        scalar_target_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_abs_error_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_abs_error_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    sort_indices = numpy.argsort(-1 * targets_w_m02)
    large_net_flux_indices = sort_indices[:num_examples_per_set]

    large_net_flux_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=large_net_flux_indices)
    large_net_flux_file_name = (
        '{0:s}/predictions_large-net-flux.nc'.format(output_dir_name))

    print('Writing examples with greatest net flux to: "{0:s}"...'.format(
        large_net_flux_file_name))
    prediction_io.write_file(
        netcdf_file_name=large_net_flux_file_name,
        scalar_target_matrix=large_net_flux_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=large_net_flux_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=large_net_flux_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=large_net_flux_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=large_net_flux_prediction_dict[
            prediction_io.HEIGHTS_KEY],
        example_id_strings=large_net_flux_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=large_net_flux_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    sort_indices = numpy.argsort(targets_w_m02)
    small_net_flux_indices = sort_indices[:num_examples_per_set]

    small_net_flux_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=small_net_flux_indices)
    small_net_flux_file_name = (
        '{0:s}/predictions_small-net-flux.nc'.format(output_dir_name))

    print('Writing examples with smallest net flux to: "{0:s}"...'.format(
        small_net_flux_file_name))
    prediction_io.write_file(
        netcdf_file_name=small_net_flux_file_name,
        scalar_target_matrix=small_net_flux_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=small_net_flux_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=small_net_flux_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=small_net_flux_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=small_net_flux_prediction_dict[
            prediction_io.HEIGHTS_KEY],
        example_id_strings=small_net_flux_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=small_net_flux_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

Пример #4

Показать файл

Файл: plot_comparisons.py Проект: thunderhoser/ml4rt

def _run(prediction_file_name, num_examples, example_dir_name, use_log_scale,
         output_dir_name):
    """Plots comparisons between predicted and actual (target) profiles.

    This is effectively the main method.

    :param prediction_file_name: See documentation at top of file.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param use_log_scale: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    if num_examples < 1:
        num_examples = None
    if example_dir_name == '':
        example_dir_name = None

    print(('Reading predicted and actual (target) profiles from: "{0:s}"...'
           ).format(prediction_file_name))

    prediction_dict = prediction_io.read_file(prediction_file_name)
    num_examples_orig = len(prediction_dict[prediction_io.EXAMPLE_IDS_KEY])

    if num_examples is not None and num_examples < num_examples_orig:
        desired_indices = numpy.linspace(0,
                                         num_examples - 1,
                                         num=num_examples,
                                         dtype=int)
        prediction_dict = prediction_io.subset_by_index(
            prediction_dict=prediction_dict, desired_indices=desired_indices)

    vector_target_matrix = prediction_dict[prediction_io.VECTOR_TARGETS_KEY]
    vector_prediction_matrix = (
        prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY])
    scalar_target_matrix = prediction_dict[prediction_io.SCALAR_TARGETS_KEY]
    scalar_prediction_matrix = (
        prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY])

    model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY]
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True)

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    model_metadata_dict[neural_net.TRAINING_OPTIONS_KEY][
        neural_net.HEIGHTS_KEY] = prediction_dict[prediction_io.HEIGHTS_KEY]

    # If necessary, convert flux increments to fluxes.
    vector_target_matrix, vector_prediction_matrix, model_metadata_dict = (
        _fluxes_increments_to_actual(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            model_metadata_dict=model_metadata_dict))

    # If necessary, convert fluxes to heating rates.
    vector_target_matrix, vector_prediction_matrix, model_metadata_dict = (
        _fluxes_to_heating_rate(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            model_metadata_dict=model_metadata_dict,
            prediction_file_name=prediction_file_name,
            example_dir_name=example_dir_name))

    # If data include both upwelling and downwelling fluxes, remove flux
    # increments (they need not be plotted).
    vector_target_matrix, vector_prediction_matrix, model_metadata_dict = (
        _remove_flux_increments(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            model_metadata_dict=model_metadata_dict))

    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]
    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    plot_fancy = all(
        [t in vector_target_names for t in DEFAULT_VECTOR_TARGET_NAMES])

    if plot_fancy:
        _plot_comparisons_fancy(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            example_id_strings=prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
            model_metadata_dict=model_metadata_dict,
            use_log_scale=use_log_scale,
            output_dir_name=output_dir_name)
    else:
        title_strings = _get_flux_strings(
            scalar_target_matrix=scalar_target_matrix,
            scalar_prediction_matrix=scalar_prediction_matrix,
            model_metadata_dict=model_metadata_dict)

        _plot_comparisons_simple(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            example_id_strings=prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
            model_metadata_dict=model_metadata_dict,
            use_log_scale=use_log_scale,
            title_strings=title_strings,
            output_dir_name=output_dir_name)

Пример #5

Показать файл

Файл: find_extreme_heating_rates.py Проект: thunderhoser/ml4rt

def _run(input_prediction_file_name, average_over_height, scale_by_climo,
         num_examples_per_set, output_dir_name):
    """Finds best and worst heating-rate predictions.

    This is effectively the main method.

    :param input_prediction_file_name: See documentation at top of file.
    :param average_over_height: Same.
    :param scale_by_climo: Same.
    :param num_examples_per_set: Same.
    :param output_dir_name: Same.
    """

    # TODO(thunderhoser): Maybe allow specific height again (e.g., 15 km).

    error_checking.assert_is_greater(num_examples_per_set, 0)
    scale_by_climo = scale_by_climo and not average_over_height

    print('Reading data from: "{0:s}"...'.format(input_prediction_file_name))
    prediction_dict = prediction_io.read_file(input_prediction_file_name)

    model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY]
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    hr_index = (vector_target_names.index(
        example_utils.SHORTWAVE_HEATING_RATE_NAME))

    target_matrix_k_day01 = (
        prediction_dict[prediction_io.VECTOR_TARGETS_KEY][..., hr_index])
    prediction_matrix_k_day01 = (
        prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY][..., hr_index])

    bias_matrix = prediction_matrix_k_day01 - target_matrix_k_day01
    absolute_error_matrix = numpy.absolute(bias_matrix)

    if average_over_height:
        bias_matrix = numpy.mean(bias_matrix, axis=1, keepdims=True)
        absolute_error_matrix = numpy.mean(absolute_error_matrix,
                                           axis=1,
                                           keepdims=True)

    if scale_by_climo:
        normalization_file_name = (
            generator_option_dict[neural_net.NORMALIZATION_FILE_KEY])

        print(('Reading training examples (for climatology) from: "{0:s}"...'
               ).format(normalization_file_name))

        training_example_dict = example_io.read_file(normalization_file_name)
        training_example_dict = example_utils.subset_by_field(
            example_dict=training_example_dict,
            field_names=[example_utils.SHORTWAVE_HEATING_RATE_NAME])
        training_example_dict = example_utils.subset_by_height(
            example_dict=training_example_dict,
            heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY])

        dummy_example_dict = {
            example_utils.SCALAR_PREDICTOR_NAMES_KEY: [],
            example_utils.VECTOR_PREDICTOR_NAMES_KEY: [],
            example_utils.SCALAR_TARGET_NAMES_KEY: [],
            example_utils.VECTOR_TARGET_NAMES_KEY:
            [example_utils.SHORTWAVE_HEATING_RATE_NAME],
            example_utils.HEIGHTS_KEY:
            generator_option_dict[neural_net.HEIGHTS_KEY]
        }

        mean_training_example_dict = normalization.create_mean_example(
            new_example_dict=dummy_example_dict,
            training_example_dict=training_example_dict)
        climo_matrix_k_day01 = mean_training_example_dict[
            example_utils.VECTOR_TARGET_VALS_KEY][..., 0]

        bias_matrix = bias_matrix / climo_matrix_k_day01
        absolute_error_matrix = absolute_error_matrix / climo_matrix_k_day01

    print(SEPARATOR_STRING)
    high_bias_indices, low_bias_indices, low_abs_error_indices = (
        misc_utils.find_best_and_worst_predictions(
            bias_matrix=bias_matrix,
            absolute_error_matrix=absolute_error_matrix,
            num_examples_per_set=num_examples_per_set))
    print(SEPARATOR_STRING)

    high_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=high_bias_indices)
    high_bias_file_name = (
        '{0:s}/predictions_high-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest positive bias to: "{0:s}"...'.format(
        high_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=high_bias_file_name,
        scalar_target_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=high_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=high_bias_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    low_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_bias_indices)
    low_bias_file_name = (
        '{0:s}/predictions_low-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest negative bias to: "{0:s}"...'.format(
        low_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_bias_file_name,
        scalar_target_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY])

    low_abs_error_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_abs_error_indices)
    low_abs_error_file_name = (
        '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name))

    print(
        'Writing examples with smallest absolute error to: "{0:s}"...'.format(
            low_abs_error_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_abs_error_file_name,
        scalar_target_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_abs_error_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_abs_error_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    if scale_by_climo:
        return

    if average_over_height:
        mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1)
        sort_indices = numpy.argsort(-1 * mean_targets_k_day01)
    else:
        max_targets_k_day01 = numpy.max(target_matrix_k_day01, axis=1)
        sort_indices = numpy.argsort(-1 * max_targets_k_day01)

    large_hr_indices = sort_indices[:num_examples_per_set]
    large_hr_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=large_hr_indices)
    large_hr_file_name = (
        '{0:s}/predictions_large-heating-rate.nc'.format(output_dir_name))

    print('Writing examples with greatest heating rate to: "{0:s}"...'.format(
        large_hr_file_name))
    prediction_io.write_file(
        netcdf_file_name=large_hr_file_name,
        scalar_target_matrix=large_hr_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=large_hr_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=large_hr_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=large_hr_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=large_hr_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=large_hr_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=large_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])

    if not average_over_height:
        return

    mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1)
    sort_indices = numpy.argsort(mean_targets_k_day01)
    small_hr_indices = sort_indices[:num_examples_per_set]

    small_hr_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=small_hr_indices)
    small_hr_file_name = (
        '{0:s}/predictions_small-heating-rate.nc'.format(output_dir_name))

    print('Writing examples with smallest heating rate to: "{0:s}"...'.format(
        small_hr_file_name))
    prediction_io.write_file(
        netcdf_file_name=small_hr_file_name,
        scalar_target_matrix=small_hr_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=small_hr_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=small_hr_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=small_hr_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=small_hr_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=small_hr_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=small_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])

Пример #6

Показать файл

Файл: split_predictions_by_cloud.py Проект: thunderhoser/ml4rt

def _run(input_file_name, example_dir_name, for_ice, min_path_kg_m02,
         output_dir_name):
    """Splits predictions by cloud regime.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param example_dir_name: Same.
    :param for_ice: Same.
    :param min_path_kg_m02: Same.
    :param output_dir_name: Same.
    """

    print('Reading data from: "{0:s}"...\n'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)

    example_dict = misc_utils.get_raw_examples(
        example_file_name='',
        num_examples=int(1e12),
        example_dir_name=example_dir_name,
        example_id_file_name=input_file_name)
    print(SEPARATOR_STRING)

    cloud_layer_counts = example_utils.find_cloud_layers(
        example_dict=example_dict,
        min_path_kg_m02=min_path_kg_m02,
        for_ice=for_ice)[-1]

    unique_cloud_layer_counts, unique_example_counts = numpy.unique(
        cloud_layer_counts, return_counts=True)

    for i in range(len(unique_cloud_layer_counts)):
        print(
            ('Number of examples with {0:d} cloud layers '
             '({1:s}-water path >= {2:.1f} g m^-2) = {3:d}').format(
                 unique_cloud_layer_counts[i], 'ice' if for_ice else 'liquid',
                 KG_TO_GRAMS * min_path_kg_m02, unique_example_counts[i]))

    print(SEPARATOR_STRING)

    num_output_files = len(MIN_LAYERS_BY_FILE)

    for k in range(num_output_files):
        these_indices = numpy.where(
            numpy.logical_and(cloud_layer_counts >= MIN_LAYERS_BY_FILE[k],
                              cloud_layer_counts <= MAX_LAYERS_BY_FILE[k]))[0]

        this_prediction_dict = prediction_io.subset_by_index(
            prediction_dict=copy.deepcopy(prediction_dict),
            desired_indices=these_indices)

        this_output_file_name = '{0:s}/predictions_{1:s}.nc'.format(
            output_dir_name, FILE_SUFFIXES[k])
        print('Writing {0:d} examples to: "{1:s}"...'.format(
            len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
            this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])