Пример #1
0
def _augment_eval_table(result_table_xarray):
    """Augments evaluation table.

    Specifically, adds number of examples and actual values for each target
    variable.

    :param result_table_xarray: Table returned by `evaluation.read_file`.
    :return: result_table_xarray: Same but with number of examples and actual
        values for each target variable.
    """

    prediction_file_name = (
        result_table_xarray.attrs[evaluation.PREDICTION_FILE_KEY])

    print('Reading data from: "{0:s}"...'.format(prediction_file_name))
    prediction_dict = prediction_io.read_file(prediction_file_name)

    num_examples = len(prediction_dict[prediction_io.EXAMPLE_IDS_KEY])
    result_table_xarray.attrs[NUM_EXAMPLES_KEY] = num_examples

    scalar_target_matrix = prediction_dict[prediction_io.SCALAR_TARGETS_KEY]
    these_dim = (EXAMPLE_DIM, evaluation.SCALAR_FIELD_DIM)
    result_table_xarray.update(
        {SCALAR_TARGET_KEY: (these_dim, scalar_target_matrix)})

    vector_target_matrix = prediction_dict[prediction_io.VECTOR_TARGETS_KEY]
    these_dim = (EXAMPLE_DIM, evaluation.HEIGHT_DIM,
                 evaluation.VECTOR_FIELD_DIM)
    result_table_xarray.update(
        {VECTOR_TARGET_KEY: (these_dim, vector_target_matrix)})

    try:
        _ = result_table_xarray.coords[evaluation.AUX_TARGET_FIELD_DIM].values
    except KeyError:
        return result_table_xarray

    example_dict = {
        example_utils.SCALAR_TARGET_NAMES_KEY:
        numpy.array(result_table_xarray.coords[
            evaluation.SCALAR_FIELD_DIM].values).tolist(),
        example_utils.VECTOR_TARGET_NAMES_KEY:
        numpy.array(result_table_xarray.coords[
            evaluation.VECTOR_FIELD_DIM].values).tolist(),
        example_utils.HEIGHTS_KEY:
        numpy.round(result_table_xarray.coords[
            evaluation.HEIGHT_DIM].values).astype(int)
    }

    aux_target_matrix = evaluation.get_aux_fields(
        prediction_dict=prediction_dict,
        example_dict=example_dict)[evaluation.AUX_TARGET_VALS_KEY]

    these_dim = (EXAMPLE_DIM, evaluation.AUX_TARGET_FIELD_DIM)
    result_table_xarray.update(
        {AUX_TARGET_KEY: (these_dim, aux_target_matrix)})

    return result_table_xarray
Пример #2
0
def _run(input_file_name, use_pmm, max_pmm_percentile_level, output_file_name):
    """Averages predicted and target values over many examples.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param use_pmm: Same.
    :param max_pmm_percentile_level: Same.
    :param output_file_name: Same.
    """

    print((
        'Reading predicted and target values for each example from: "{0:s}"...'
    ).format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)

    num_examples = prediction_dict[prediction_io.VECTOR_TARGETS_KEY].shape[0]

    print('Averaging {0:d} examples...'.format(num_examples))
    mean_prediction_dict = prediction_io.average_predictions(
        prediction_dict=prediction_dict,
        use_pmm=use_pmm,
        max_pmm_percentile_level=max_pmm_percentile_level)

    example_id_strings = [example_utils.get_dummy_example_id()]
    print(example_id_strings)

    print('Writing mean example to: "{0:s}"...'.format(output_file_name))
    prediction_io.write_file(
        netcdf_file_name=output_file_name,
        scalar_target_matrix=mean_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=mean_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=mean_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=mean_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=mean_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=example_id_strings,
        model_file_name=mean_prediction_dict[prediction_io.MODEL_FILE_KEY])
Пример #3
0
def _run(evaluation_file_names, line_styles, line_colour_strings,
         set_descriptions_verbose, confidence_level, use_log_scale,
         plot_by_height, output_dir_name):
    """Plots model evaluation.

    This is effectively the main method.

    :param evaluation_file_names: See documentation at top of file.
    :param line_styles: Same.
    :param line_colour_strings: Same.
    :param set_descriptions_verbose: Same.
    :param confidence_level: Same.
    :param use_log_scale: Same.
    :param plot_by_height: Same.
    :param output_dir_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    if confidence_level < 0:
        confidence_level = None

    if confidence_level is not None:
        error_checking.assert_is_geq(confidence_level, 0.9)
        error_checking.assert_is_less_than(confidence_level, 1.)

    num_evaluation_sets = len(evaluation_file_names)
    expected_dim = numpy.array([num_evaluation_sets], dtype=int)

    error_checking.assert_is_string_list(line_styles)
    error_checking.assert_is_numpy_array(numpy.array(line_styles),
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_string_list(set_descriptions_verbose)
    error_checking.assert_is_numpy_array(numpy.array(set_descriptions_verbose),
                                         exact_dimensions=expected_dim)

    set_descriptions_verbose = [
        s.replace('_', ' ') for s in set_descriptions_verbose
    ]
    set_descriptions_abbrev = [
        s.lower().replace(' ', '-') for s in set_descriptions_verbose
    ]

    error_checking.assert_is_string_list(line_colour_strings)
    error_checking.assert_is_numpy_array(numpy.array(line_colour_strings),
                                         exact_dimensions=expected_dim)
    line_colours = [
        numpy.fromstring(s, dtype=float, sep='_') / 255
        for s in line_colour_strings
    ]

    for i in range(num_evaluation_sets):
        error_checking.assert_is_numpy_array(line_colours[i],
                                             exact_dimensions=numpy.array(
                                                 [3], dtype=int))
        error_checking.assert_is_geq_numpy_array(line_colours[i], 0.)
        error_checking.assert_is_leq_numpy_array(line_colours[i], 1.)

    # Read files.
    evaluation_tables_xarray = [xarray.Dataset()] * num_evaluation_sets
    prediction_dicts = [dict()] * num_evaluation_sets

    for i in range(num_evaluation_sets):
        print('Reading data from: "{0:s}"...'.format(evaluation_file_names[i]))
        evaluation_tables_xarray[i] = evaluation.read_file(
            evaluation_file_names[i])

        this_prediction_file_name = (
            evaluation_tables_xarray[i].attrs[evaluation.PREDICTION_FILE_KEY])

        print(
            'Reading data from: "{0:s}"...'.format(this_prediction_file_name))
        prediction_dicts[i] = prediction_io.read_file(
            this_prediction_file_name)

    model_file_name = (
        evaluation_tables_xarray[0].attrs[evaluation.MODEL_FILE_KEY])
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    scalar_target_names = (
        generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY])
    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY]

    try:
        t = evaluation_tables_xarray[0]
        aux_target_names = t.coords[evaluation.AUX_TARGET_FIELD_DIM].values
    except:
        aux_target_names = []

    num_scalar_targets = len(scalar_target_names)
    num_vector_targets = len(vector_target_names)
    num_heights = len(heights_m_agl)
    num_aux_targets = len(aux_target_names)

    example_dict = {
        example_utils.SCALAR_TARGET_NAMES_KEY:
        scalar_target_names,
        example_utils.VECTOR_TARGET_NAMES_KEY:
        vector_target_names,
        example_utils.HEIGHTS_KEY:
        heights_m_agl,
        example_utils.SCALAR_PREDICTOR_NAMES_KEY:
        generator_option_dict[neural_net.SCALAR_PREDICTOR_NAMES_KEY],
        example_utils.VECTOR_PREDICTOR_NAMES_KEY:
        generator_option_dict[neural_net.VECTOR_PREDICTOR_NAMES_KEY]
    }

    normalization_file_name = (
        generator_option_dict[neural_net.NORMALIZATION_FILE_KEY])
    print(('Reading training examples (for climatology) from: "{0:s}"...'
           ).format(normalization_file_name))

    training_example_dict = example_io.read_file(normalization_file_name)
    training_example_dict = example_utils.subset_by_height(
        example_dict=training_example_dict, heights_m_agl=heights_m_agl)
    mean_training_example_dict = normalization.create_mean_example(
        new_example_dict=example_dict,
        training_example_dict=training_example_dict)

    print(SEPARATOR_STRING)

    # Do actual stuff.
    _plot_error_distributions(
        prediction_dicts=prediction_dicts,
        model_metadata_dict=model_metadata_dict,
        aux_target_names=aux_target_names,
        set_descriptions_abbrev=set_descriptions_abbrev,
        set_descriptions_verbose=set_descriptions_verbose,
        output_dir_name=output_dir_name)
    print(SEPARATOR_STRING)

    _plot_reliability_by_height(
        evaluation_tables_xarray=evaluation_tables_xarray,
        vector_target_names=vector_target_names,
        heights_m_agl=heights_m_agl,
        set_descriptions_abbrev=set_descriptions_abbrev,
        set_descriptions_verbose=set_descriptions_verbose,
        output_dir_name=output_dir_name)
    print(SEPARATOR_STRING)

    for k in range(num_vector_targets):
        for this_score_name in list(SCORE_NAME_TO_PROFILE_KEY.keys()):
            _plot_score_profile(
                evaluation_tables_xarray=evaluation_tables_xarray,
                line_styles=line_styles,
                line_colours=line_colours,
                set_descriptions_verbose=set_descriptions_verbose,
                confidence_level=confidence_level,
                target_name=vector_target_names[k],
                score_name=this_score_name,
                use_log_scale=use_log_scale,
                output_dir_name=output_dir_name)

    print(SEPARATOR_STRING)

    for k in range(num_scalar_targets):
        _plot_attributes_diagram(
            evaluation_tables_xarray=evaluation_tables_xarray,
            line_styles=line_styles,
            line_colours=line_colours,
            set_descriptions_abbrev=set_descriptions_abbrev,
            set_descriptions_verbose=set_descriptions_verbose,
            confidence_level=confidence_level,
            mean_training_example_dict=mean_training_example_dict,
            target_name=scalar_target_names[k],
            output_dir_name=output_dir_name)

    for k in range(num_aux_targets):
        _plot_attributes_diagram(
            evaluation_tables_xarray=evaluation_tables_xarray,
            line_styles=line_styles,
            line_colours=line_colours,
            set_descriptions_abbrev=set_descriptions_abbrev,
            set_descriptions_verbose=set_descriptions_verbose,
            confidence_level=confidence_level,
            mean_training_example_dict=mean_training_example_dict,
            target_name=aux_target_names[k],
            output_dir_name=output_dir_name)

    if not plot_by_height:
        return

    print(SEPARATOR_STRING)

    for k in range(num_vector_targets):
        for j in range(num_heights):
            _plot_attributes_diagram(
                evaluation_tables_xarray=evaluation_tables_xarray,
                line_styles=line_styles,
                line_colours=line_colours,
                set_descriptions_abbrev=set_descriptions_abbrev,
                set_descriptions_verbose=set_descriptions_verbose,
                confidence_level=confidence_level,
                mean_training_example_dict=mean_training_example_dict,
                height_m_agl=heights_m_agl[j],
                target_name=vector_target_names[k],
                output_dir_name=output_dir_name)

        if k != num_vector_targets - 1:
            print(SEPARATOR_STRING)
def _run(input_file_name, top_output_dir_name):
    """Splits predictions by site (point location).

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param top_output_dir_name: Same.
    :raises: ValueError: if any example cannot be assigned to a site.
    """

    # Read data.
    print('Reading data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)
    example_metadata_dict = example_utils.parse_example_ids(
        prediction_dict[prediction_io.EXAMPLE_IDS_KEY])

    example_latitudes_deg_n = number_rounding.round_to_nearest(
        example_metadata_dict[example_utils.LATITUDES_KEY],
        LATLNG_TOLERANCE_DEG)
    example_longitudes_deg_e = number_rounding.round_to_nearest(
        example_metadata_dict[example_utils.LONGITUDES_KEY],
        LATLNG_TOLERANCE_DEG)
    example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west(
        example_longitudes_deg_e)

    num_examples = len(example_latitudes_deg_n)
    example_written_flags = numpy.full(num_examples, False, dtype=bool)

    site_names = list(SITE_NAME_TO_LATLNG.keys())
    num_sites = len(site_names)

    for j in range(num_sites):
        this_site_latitude_deg_n = SITE_NAME_TO_LATLNG[site_names[j]][0]
        this_site_longitude_deg_e = SITE_NAME_TO_LATLNG[site_names[j]][1]

        these_indices = numpy.where(
            numpy.logical_and(
                numpy.absolute(example_latitudes_deg_n -
                               this_site_latitude_deg_n) <=
                LATLNG_TOLERANCE_DEG,
                numpy.absolute(example_longitudes_deg_e -
                               this_site_longitude_deg_e) <=
                LATLNG_TOLERANCE_DEG))[0]

        this_prediction_dict = prediction_io.subset_by_index(
            prediction_dict=copy.deepcopy(prediction_dict),
            desired_indices=these_indices)

        this_output_file_name = '{0:s}/{1:s}/predictions.nc'.format(
            top_output_dir_name, site_names[j])
        print('Writing {0:d} examples to: "{1:s}"...'.format(
            len(these_indices), this_output_file_name))

        if len(these_indices) == 0:
            continue

        example_written_flags[these_indices] = True

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

    if numpy.all(example_written_flags):
        return

    # bad_latitudes_deg_n = (
    #     example_latitudes_deg_n[example_written_flags == False]
    # )
    # bad_longitudes_deg_e = (
    #     example_longitudes_deg_e[example_written_flags == False]
    # )
    # bad_coord_matrix = numpy.transpose(numpy.vstack((
    #     bad_latitudes_deg_n, bad_longitudes_deg_e
    # )))
    # bad_coord_matrix = numpy.unique(bad_coord_matrix, axis=0)
    # print(bad_coord_matrix)

    error_string = (
        '{0:d} of {1:d} examples could not be assigned to a site.  This is a '
        'BIG PROBLEM.').format(numpy.sum(example_written_flags == False),
                               num_examples)

    raise ValueError(error_string)
Пример #5
0
def _run(input_prediction_file_name, model_file_name,
         output_prediction_file_name):
    """Applies one set of isotonic-regression models to data.

    This is effectively the main method.

    :param input_prediction_file_name: See documentation at top of file.
    :param model_file_name: Same.
    :param output_prediction_file_name: Same.
    :raises: ValueError: if predictions in `input_prediction_file_name` were
        made with isotonic regression.
    """

    print('Reading original predictions from: "{0:s}"...'.format(
        input_prediction_file_name
    ))
    prediction_dict = prediction_io.read_file(input_prediction_file_name)

    if prediction_dict[prediction_io.ISOTONIC_MODEL_FILE_KEY] is not None:
        raise ValueError(
            'Input predictions must be made with base model only (i.e., must '
            'not already include isotonic regression).'
        )

    orig_vector_prediction_matrix = (
        None if prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY].size == 0
        else prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY]
    )
    orig_scalar_prediction_matrix = (
        None if prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY].size == 0
        else prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY]
    )

    print('Reading isotonic-regression models from: "{0:s}"...'.format(
        model_file_name
    ))
    scalar_model_objects, vector_model_object_matrix = (
        isotonic_regression.read_file(model_file_name)
    )

    print(SEPARATOR_STRING)
    new_vector_prediction_matrix, new_scalar_prediction_matrix = (
        isotonic_regression.apply_models(
            orig_vector_prediction_matrix=orig_vector_prediction_matrix,
            orig_scalar_prediction_matrix=orig_scalar_prediction_matrix,
            scalar_model_objects=scalar_model_objects,
            vector_model_object_matrix=vector_model_object_matrix
        )
    )
    print(SEPARATOR_STRING)

    print('Writing new predictions to: "{0:s}"...'.format(
        output_prediction_file_name
    ))
    prediction_io.write_file(
        netcdf_file_name=output_prediction_file_name,
        scalar_target_matrix=prediction_dict[prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=prediction_dict[prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=new_scalar_prediction_matrix,
        vector_prediction_matrix=new_vector_prediction_matrix,
        heights_m_agl=prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=prediction_dict[prediction_io.MODEL_FILE_KEY],
        isotonic_model_file_name=model_file_name
    )
Пример #6
0
def _get_target_values(prediction_file_name, model_metadata_dict,
                       example_id_strings, target_field_name,
                       target_height_m_agl):
    """Returns predicted and actual target values.

    E = number of examples

    :param prediction_file_name: See documentation at top of file.
    :param model_metadata_dict: Dictionary returned by
        `neural_net.read_metafile`.
    :param example_id_strings: length-E list of example IDs.  Will return target
        values only for these examples.
    :param target_field_name: Name of target variable.
    :param target_height_m_agl: Height of target variable (metres above ground
        level).
    :return: predicted_values: length-E numpy array of predicted target values.
    :return: actual_values: length-E numpy array of actual target values.
    """

    print(('Reading predicted and actual target values from: "{0:s}"...'
           ).format(prediction_file_name))
    prediction_dict = prediction_io.read_file(prediction_file_name)

    example_indices = numpy.array([
        prediction_dict[prediction_io.EXAMPLE_IDS_KEY].index(id)
        for id in example_id_strings
    ],
                                  dtype=int)

    generator_option_dict = (
        model_metadata_dict[neural_net.TRAINING_OPTIONS_KEY])

    if target_height_m_agl is None:
        scalar_target_names = (
            generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY])
        channel_index = scalar_target_names.index(target_field_name)

        actual_values = (
            prediction_dict[prediction_io.SCALAR_TARGETS_KEY][example_indices,
                                                              channel_index])
        predicted_values = (prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY][example_indices,
                                                  channel_index])

        return predicted_values, actual_values

    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    channel_index = vector_target_names.index(target_field_name)

    height_index = example_utils.match_heights(
        heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY],
        desired_height_m_agl=target_height_m_agl)

    actual_values = (
        prediction_dict[prediction_io.VECTOR_TARGETS_KEY][example_indices,
                                                          height_index,
                                                          channel_index])
    predicted_values = (
        prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY][example_indices,
                                                              height_index,
                                                              channel_index])

    return predicted_values, actual_values
Пример #7
0
def _run(input_file_name, min_latitude_deg, max_latitude_deg, min_longitude_deg,
         max_longitude_deg, latitude_spacing_deg, longitude_spacing_deg,
         output_dir_name):
    """Splits predictions by spatial region.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param min_latitude_deg: Same.
    :param max_latitude_deg: Same.
    :param min_longitude_deg: Same.
    :param max_longitude_deg: Same.
    :param latitude_spacing_deg: Same.
    :param longitude_spacing_deg: Same.
    :param output_dir_name: Same.
    """

    # Read data.
    print('Reading data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)
    example_metadata_dict = example_utils.parse_example_ids(
        prediction_dict[prediction_io.EXAMPLE_IDS_KEY]
    )

    example_latitudes_deg = example_metadata_dict[example_utils.LATITUDES_KEY]
    example_longitudes_deg = example_metadata_dict[example_utils.LONGITUDES_KEY]

    these_limits_deg = numpy.array([
        min_latitude_deg, max_latitude_deg, min_longitude_deg, max_longitude_deg
    ])
    if numpy.any(numpy.isnan(these_limits_deg)):
        min_latitude_deg = numpy.min(example_latitudes_deg)
        max_latitude_deg = numpy.max(example_latitudes_deg)
        min_longitude_deg = numpy.min(example_longitudes_deg)
        max_longitude_deg = numpy.max(example_longitudes_deg)

    # Create grid.
    grid_point_latitudes_deg, grid_point_longitudes_deg = (
        misc.create_latlng_grid(
            min_latitude_deg=min_latitude_deg,
            max_latitude_deg=max_latitude_deg,
            latitude_spacing_deg=latitude_spacing_deg,
            min_longitude_deg=min_longitude_deg,
            max_longitude_deg=max_longitude_deg,
            longitude_spacing_deg=longitude_spacing_deg
        )
    )

    num_grid_rows = len(grid_point_latitudes_deg)
    num_grid_columns = len(grid_point_longitudes_deg)

    grid_edge_latitudes_deg, grid_edge_longitudes_deg = (
        grids.get_latlng_grid_cell_edges(
            min_latitude_deg=grid_point_latitudes_deg[0],
            min_longitude_deg=grid_point_longitudes_deg[0],
            lat_spacing_deg=numpy.diff(grid_point_latitudes_deg[:2])[0],
            lng_spacing_deg=numpy.diff(grid_point_longitudes_deg[:2])[0],
            num_rows=num_grid_rows, num_columns=num_grid_columns
        )
    )

    print(SEPARATOR_STRING)

    for i in range(num_grid_rows):
        for j in range(num_grid_columns):
            these_indices = grids.find_events_in_grid_cell(
                event_x_coords_metres=example_longitudes_deg,
                event_y_coords_metres=example_latitudes_deg,
                grid_edge_x_coords_metres=grid_edge_longitudes_deg,
                grid_edge_y_coords_metres=grid_edge_latitudes_deg,
                row_index=i, column_index=j, verbose=False
            )

            this_prediction_dict = prediction_io.subset_by_index(
                prediction_dict=copy.deepcopy(prediction_dict),
                desired_indices=these_indices
            )
            this_num_examples = len(
                this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]
            )

            if this_num_examples == 0:
                continue

            this_output_file_name = prediction_io.find_file(
                directory_name=output_dir_name, grid_row=i, grid_column=j,
                raise_error_if_missing=False
            )
            print('Writing {0:d} examples to: "{1:s}"...'.format(
                len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
                this_output_file_name
            ))

            prediction_io.write_file(
                netcdf_file_name=this_output_file_name,
                scalar_target_matrix=
                this_prediction_dict[prediction_io.SCALAR_TARGETS_KEY],
                vector_target_matrix=
                this_prediction_dict[prediction_io.VECTOR_TARGETS_KEY],
                scalar_prediction_matrix=
                this_prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY],
                vector_prediction_matrix=
                this_prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY],
                heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
                example_id_strings=
                this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
                model_file_name=
                this_prediction_dict[prediction_io.MODEL_FILE_KEY]
            )

    print(SEPARATOR_STRING)

    grid_metafile_name = prediction_io.find_grid_metafile(
        prediction_dir_name=output_dir_name, raise_error_if_missing=False
    )

    print('Writing grid metadata to: "{0:s}"...'.format(grid_metafile_name))
    prediction_io.write_grid_metafile(
        grid_point_latitudes_deg=grid_point_latitudes_deg,
        grid_point_longitudes_deg=grid_point_longitudes_deg,
        netcdf_file_name=grid_metafile_name
    )
Пример #8
0
def _run(prediction_file_name, num_examples, example_dir_name, use_log_scale,
         output_dir_name):
    """Plots comparisons between predicted and actual (target) profiles.

    This is effectively the main method.

    :param prediction_file_name: See documentation at top of file.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param use_log_scale: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    if num_examples < 1:
        num_examples = None
    if example_dir_name == '':
        example_dir_name = None

    print(('Reading predicted and actual (target) profiles from: "{0:s}"...'
           ).format(prediction_file_name))

    prediction_dict = prediction_io.read_file(prediction_file_name)
    num_examples_orig = len(prediction_dict[prediction_io.EXAMPLE_IDS_KEY])

    if num_examples is not None and num_examples < num_examples_orig:
        desired_indices = numpy.linspace(0,
                                         num_examples - 1,
                                         num=num_examples,
                                         dtype=int)
        prediction_dict = prediction_io.subset_by_index(
            prediction_dict=prediction_dict, desired_indices=desired_indices)

    vector_target_matrix = prediction_dict[prediction_io.VECTOR_TARGETS_KEY]
    vector_prediction_matrix = (
        prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY])
    scalar_target_matrix = prediction_dict[prediction_io.SCALAR_TARGETS_KEY]
    scalar_prediction_matrix = (
        prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY])

    model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY]
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True)

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    model_metadata_dict[neural_net.TRAINING_OPTIONS_KEY][
        neural_net.HEIGHTS_KEY] = prediction_dict[prediction_io.HEIGHTS_KEY]

    # If necessary, convert flux increments to fluxes.
    vector_target_matrix, vector_prediction_matrix, model_metadata_dict = (
        _fluxes_increments_to_actual(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            model_metadata_dict=model_metadata_dict))

    # If necessary, convert fluxes to heating rates.
    vector_target_matrix, vector_prediction_matrix, model_metadata_dict = (
        _fluxes_to_heating_rate(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            model_metadata_dict=model_metadata_dict,
            prediction_file_name=prediction_file_name,
            example_dir_name=example_dir_name))

    # If data include both upwelling and downwelling fluxes, remove flux
    # increments (they need not be plotted).
    vector_target_matrix, vector_prediction_matrix, model_metadata_dict = (
        _remove_flux_increments(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            model_metadata_dict=model_metadata_dict))

    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]
    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    plot_fancy = all(
        [t in vector_target_names for t in DEFAULT_VECTOR_TARGET_NAMES])

    if plot_fancy:
        _plot_comparisons_fancy(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            example_id_strings=prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
            model_metadata_dict=model_metadata_dict,
            use_log_scale=use_log_scale,
            output_dir_name=output_dir_name)
    else:
        title_strings = _get_flux_strings(
            scalar_target_matrix=scalar_target_matrix,
            scalar_prediction_matrix=scalar_prediction_matrix,
            model_metadata_dict=model_metadata_dict)

        _plot_comparisons_simple(
            vector_target_matrix=vector_target_matrix,
            vector_prediction_matrix=vector_prediction_matrix,
            example_id_strings=prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
            model_metadata_dict=model_metadata_dict,
            use_log_scale=use_log_scale,
            title_strings=title_strings,
            output_dir_name=output_dir_name)
Пример #9
0
def _run(input_prediction_file_name, num_examples_per_set, output_dir_name):
    """Finds best and worst heating-rate predictions.

    This is effectively the main method.

    :param input_prediction_file_name: See documentation at top of file.
    :param num_examples_per_set: Same.
    :param output_dir_name: Same.
    """

    error_checking.assert_is_greater(num_examples_per_set, 0)

    print('Reading data from: "{0:s}"...'.format(input_prediction_file_name))
    prediction_dict = prediction_io.read_file(input_prediction_file_name)

    model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY]
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    scalar_target_names = (
        generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY])
    down_index = scalar_target_names.index(
        example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME)
    up_index = scalar_target_names.index(
        example_utils.SHORTWAVE_TOA_UP_FLUX_NAME)

    targets_w_m02 = (
        prediction_dict[prediction_io.SCALAR_TARGETS_KEY][..., down_index] -
        prediction_dict[prediction_io.SCALAR_TARGETS_KEY][..., up_index])
    predictions_w_m02 = (
        prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY][..., down_index]
        - prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY][..., up_index])

    biases_w_m02 = predictions_w_m02 - targets_w_m02
    bias_matrix = numpy.expand_dims(biases_w_m02, axis=1)

    print(SEPARATOR_STRING)
    high_bias_indices, low_bias_indices, low_abs_error_indices = (
        misc_utils.find_best_and_worst_predictions(
            bias_matrix=bias_matrix,
            absolute_error_matrix=numpy.absolute(bias_matrix),
            num_examples_per_set=num_examples_per_set))
    print(SEPARATOR_STRING)

    high_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=high_bias_indices)
    high_bias_file_name = (
        '{0:s}/predictions_high-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest positive bias to: "{0:s}"...'.format(
        high_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=high_bias_file_name,
        scalar_target_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=high_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=high_bias_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    low_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_bias_indices)
    low_bias_file_name = (
        '{0:s}/predictions_low-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest negative bias to: "{0:s}"...'.format(
        low_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_bias_file_name,
        scalar_target_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY])

    low_abs_error_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_abs_error_indices)
    low_abs_error_file_name = (
        '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name))

    print(
        'Writing examples with smallest absolute error to: "{0:s}"...'.format(
            low_abs_error_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_abs_error_file_name,
        scalar_target_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_abs_error_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_abs_error_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    sort_indices = numpy.argsort(-1 * targets_w_m02)
    large_net_flux_indices = sort_indices[:num_examples_per_set]

    large_net_flux_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=large_net_flux_indices)
    large_net_flux_file_name = (
        '{0:s}/predictions_large-net-flux.nc'.format(output_dir_name))

    print('Writing examples with greatest net flux to: "{0:s}"...'.format(
        large_net_flux_file_name))
    prediction_io.write_file(
        netcdf_file_name=large_net_flux_file_name,
        scalar_target_matrix=large_net_flux_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=large_net_flux_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=large_net_flux_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=large_net_flux_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=large_net_flux_prediction_dict[
            prediction_io.HEIGHTS_KEY],
        example_id_strings=large_net_flux_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=large_net_flux_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    sort_indices = numpy.argsort(targets_w_m02)
    small_net_flux_indices = sort_indices[:num_examples_per_set]

    small_net_flux_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=small_net_flux_indices)
    small_net_flux_file_name = (
        '{0:s}/predictions_small-net-flux.nc'.format(output_dir_name))

    print('Writing examples with smallest net flux to: "{0:s}"...'.format(
        small_net_flux_file_name))
    prediction_io.write_file(
        netcdf_file_name=small_net_flux_file_name,
        scalar_target_matrix=small_net_flux_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=small_net_flux_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=small_net_flux_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=small_net_flux_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=small_net_flux_prediction_dict[
            prediction_io.HEIGHTS_KEY],
        example_id_strings=small_net_flux_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=small_net_flux_prediction_dict[
            prediction_io.MODEL_FILE_KEY])
def _run(input_prediction_file_name, average_over_height, scale_by_climo,
         num_examples_per_set, output_dir_name):
    """Finds best and worst heating-rate predictions.

    This is effectively the main method.

    :param input_prediction_file_name: See documentation at top of file.
    :param average_over_height: Same.
    :param scale_by_climo: Same.
    :param num_examples_per_set: Same.
    :param output_dir_name: Same.
    """

    # TODO(thunderhoser): Maybe allow specific height again (e.g., 15 km).

    error_checking.assert_is_greater(num_examples_per_set, 0)
    scale_by_climo = scale_by_climo and not average_over_height

    print('Reading data from: "{0:s}"...'.format(input_prediction_file_name))
    prediction_dict = prediction_io.read_file(input_prediction_file_name)

    model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY]
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    hr_index = (vector_target_names.index(
        example_utils.SHORTWAVE_HEATING_RATE_NAME))

    target_matrix_k_day01 = (
        prediction_dict[prediction_io.VECTOR_TARGETS_KEY][..., hr_index])
    prediction_matrix_k_day01 = (
        prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY][..., hr_index])

    bias_matrix = prediction_matrix_k_day01 - target_matrix_k_day01
    absolute_error_matrix = numpy.absolute(bias_matrix)

    if average_over_height:
        bias_matrix = numpy.mean(bias_matrix, axis=1, keepdims=True)
        absolute_error_matrix = numpy.mean(absolute_error_matrix,
                                           axis=1,
                                           keepdims=True)

    if scale_by_climo:
        normalization_file_name = (
            generator_option_dict[neural_net.NORMALIZATION_FILE_KEY])

        print(('Reading training examples (for climatology) from: "{0:s}"...'
               ).format(normalization_file_name))

        training_example_dict = example_io.read_file(normalization_file_name)
        training_example_dict = example_utils.subset_by_field(
            example_dict=training_example_dict,
            field_names=[example_utils.SHORTWAVE_HEATING_RATE_NAME])
        training_example_dict = example_utils.subset_by_height(
            example_dict=training_example_dict,
            heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY])

        dummy_example_dict = {
            example_utils.SCALAR_PREDICTOR_NAMES_KEY: [],
            example_utils.VECTOR_PREDICTOR_NAMES_KEY: [],
            example_utils.SCALAR_TARGET_NAMES_KEY: [],
            example_utils.VECTOR_TARGET_NAMES_KEY:
            [example_utils.SHORTWAVE_HEATING_RATE_NAME],
            example_utils.HEIGHTS_KEY:
            generator_option_dict[neural_net.HEIGHTS_KEY]
        }

        mean_training_example_dict = normalization.create_mean_example(
            new_example_dict=dummy_example_dict,
            training_example_dict=training_example_dict)
        climo_matrix_k_day01 = mean_training_example_dict[
            example_utils.VECTOR_TARGET_VALS_KEY][..., 0]

        bias_matrix = bias_matrix / climo_matrix_k_day01
        absolute_error_matrix = absolute_error_matrix / climo_matrix_k_day01

    print(SEPARATOR_STRING)
    high_bias_indices, low_bias_indices, low_abs_error_indices = (
        misc_utils.find_best_and_worst_predictions(
            bias_matrix=bias_matrix,
            absolute_error_matrix=absolute_error_matrix,
            num_examples_per_set=num_examples_per_set))
    print(SEPARATOR_STRING)

    high_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=high_bias_indices)
    high_bias_file_name = (
        '{0:s}/predictions_high-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest positive bias to: "{0:s}"...'.format(
        high_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=high_bias_file_name,
        scalar_target_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=high_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=high_bias_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    low_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_bias_indices)
    low_bias_file_name = (
        '{0:s}/predictions_low-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest negative bias to: "{0:s}"...'.format(
        low_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_bias_file_name,
        scalar_target_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY])

    low_abs_error_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_abs_error_indices)
    low_abs_error_file_name = (
        '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name))

    print(
        'Writing examples with smallest absolute error to: "{0:s}"...'.format(
            low_abs_error_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_abs_error_file_name,
        scalar_target_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_abs_error_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_abs_error_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    if scale_by_climo:
        return

    if average_over_height:
        mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1)
        sort_indices = numpy.argsort(-1 * mean_targets_k_day01)
    else:
        max_targets_k_day01 = numpy.max(target_matrix_k_day01, axis=1)
        sort_indices = numpy.argsort(-1 * max_targets_k_day01)

    large_hr_indices = sort_indices[:num_examples_per_set]
    large_hr_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=large_hr_indices)
    large_hr_file_name = (
        '{0:s}/predictions_large-heating-rate.nc'.format(output_dir_name))

    print('Writing examples with greatest heating rate to: "{0:s}"...'.format(
        large_hr_file_name))
    prediction_io.write_file(
        netcdf_file_name=large_hr_file_name,
        scalar_target_matrix=large_hr_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=large_hr_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=large_hr_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=large_hr_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=large_hr_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=large_hr_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=large_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])

    if not average_over_height:
        return

    mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1)
    sort_indices = numpy.argsort(mean_targets_k_day01)
    small_hr_indices = sort_indices[:num_examples_per_set]

    small_hr_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=small_hr_indices)
    small_hr_file_name = (
        '{0:s}/predictions_small-heating-rate.nc'.format(output_dir_name))

    print('Writing examples with smallest heating rate to: "{0:s}"...'.format(
        small_hr_file_name))
    prediction_io.write_file(
        netcdf_file_name=small_hr_file_name,
        scalar_target_matrix=small_hr_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=small_hr_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=small_hr_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=small_hr_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=small_hr_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=small_hr_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=small_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])
Пример #11
0
def _run(input_file_name, num_zenith_angle_bins, num_albedo_bins,
         output_dir_name):
    """Splits predictions by time of day and time of year.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param num_zenith_angle_bins: Same.
    :param num_albedo_bins: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    error_checking.assert_is_geq(num_zenith_angle_bins, 3)
    error_checking.assert_is_geq(num_albedo_bins, 3)

    edge_zenith_angles_rad = numpy.linspace(MIN_ZENITH_ANGLE_RAD,
                                            MAX_ZENITH_ANGLE_RAD,
                                            num=num_zenith_angle_bins + 1,
                                            dtype=float)
    min_zenith_angles_rad = edge_zenith_angles_rad[:-1]
    max_zenith_angles_rad = edge_zenith_angles_rad[1:]

    edge_albedos = numpy.linspace(0, 1, num=num_albedo_bins + 1, dtype=float)
    min_albedos = edge_albedos[:-1]
    max_albedos = edge_albedos[1:]

    # Read data.
    print('Reading data from: "{0:s}"...\n'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)

    # Split by solar zenith angle.
    for k in range(num_zenith_angle_bins):
        this_prediction_dict = prediction_io.subset_by_zenith_angle(
            prediction_dict=copy.deepcopy(prediction_dict),
            min_zenith_angle_rad=min_zenith_angles_rad[k],
            max_zenith_angle_rad=max_zenith_angles_rad[k])

        this_output_file_name = prediction_io.find_file(
            directory_name=output_dir_name,
            zenith_angle_bin=k,
            raise_error_if_missing=False)
        print((
            'Writing {0:d} examples (with zenith angles {1:.4f}...{2:.4f} rad) '
            'to: "{3:s}"...').format(
                len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
                min_zenith_angles_rad[k], max_zenith_angles_rad[k],
                this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

    print('\n')

    # Split by albedo.
    for k in range(num_albedo_bins):
        this_prediction_dict = prediction_io.subset_by_albedo(
            prediction_dict=copy.deepcopy(prediction_dict),
            min_albedo=min_albedos[k],
            max_albedo=max_albedos[k])

        this_output_file_name = prediction_io.find_file(
            directory_name=output_dir_name,
            albedo_bin=k,
            raise_error_if_missing=False)
        print(('Writing {0:d} examples (with albedos {1:.4f}...{2:.4f}) '
               'to: "{3:s}"...').format(
                   len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
                   min_albedos[k], max_albedos[k], this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

    print('\n')

    # Split by month.
    for k in range(1, 13):
        this_prediction_dict = prediction_io.subset_by_month(
            prediction_dict=copy.deepcopy(prediction_dict), desired_month=k)

        this_output_file_name = prediction_io.find_file(
            directory_name=output_dir_name,
            month=k,
            raise_error_if_missing=False)
        print('Writing {0:d} examples to: "{1:s}"...'.format(
            len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
            this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])
def _run(input_file_name, example_dir_name, for_ice, min_path_kg_m02,
         output_dir_name):
    """Splits predictions by cloud regime.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param example_dir_name: Same.
    :param for_ice: Same.
    :param min_path_kg_m02: Same.
    :param output_dir_name: Same.
    """

    print('Reading data from: "{0:s}"...\n'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)

    example_dict = misc_utils.get_raw_examples(
        example_file_name='',
        num_examples=int(1e12),
        example_dir_name=example_dir_name,
        example_id_file_name=input_file_name)
    print(SEPARATOR_STRING)

    cloud_layer_counts = example_utils.find_cloud_layers(
        example_dict=example_dict,
        min_path_kg_m02=min_path_kg_m02,
        for_ice=for_ice)[-1]

    unique_cloud_layer_counts, unique_example_counts = numpy.unique(
        cloud_layer_counts, return_counts=True)

    for i in range(len(unique_cloud_layer_counts)):
        print(
            ('Number of examples with {0:d} cloud layers '
             '({1:s}-water path >= {2:.1f} g m^-2) = {3:d}').format(
                 unique_cloud_layer_counts[i], 'ice' if for_ice else 'liquid',
                 KG_TO_GRAMS * min_path_kg_m02, unique_example_counts[i]))

    print(SEPARATOR_STRING)

    num_output_files = len(MIN_LAYERS_BY_FILE)

    for k in range(num_output_files):
        these_indices = numpy.where(
            numpy.logical_and(cloud_layer_counts >= MIN_LAYERS_BY_FILE[k],
                              cloud_layer_counts <= MAX_LAYERS_BY_FILE[k]))[0]

        this_prediction_dict = prediction_io.subset_by_index(
            prediction_dict=copy.deepcopy(prediction_dict),
            desired_indices=these_indices)

        this_output_file_name = '{0:s}/predictions_{1:s}.nc'.format(
            output_dir_name, FILE_SUFFIXES[k])
        print('Writing {0:d} examples to: "{1:s}"...'.format(
            len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
            this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])
Пример #13
0
def _augment_eval_table(result_table_xarray):
    """Augments evaluation table.

    Specifically, adds number of examples and skewness for each target variable.

    :param result_table_xarray: Table returned by `evaluation.read_file`.
    :return: result_table_xarray: Same but with number of examples and skewness
        for each target variable.
    """

    prediction_file_name = (
        result_table_xarray.attrs[evaluation.PREDICTION_FILE_KEY])

    print('Reading data from: "{0:s}"...'.format(prediction_file_name))
    prediction_dict = prediction_io.read_file(prediction_file_name)

    num_examples = len(prediction_dict[prediction_io.EXAMPLE_IDS_KEY])
    result_table_xarray.attrs[NUM_EXAMPLES_KEY] = num_examples

    scalar_target_matrix = prediction_dict[prediction_io.SCALAR_TARGETS_KEY]
    if scalar_target_matrix.size == 0:
        scalar_skewness_matrix = numpy.full(0, 0.)
    else:
        scalar_skewness_matrix = scipy.stats.skew(scalar_target_matrix,
                                                  axis=0,
                                                  bias=False,
                                                  nan_policy='omit')

    these_dim = (evaluation.SCALAR_FIELD_DIM, )
    result_table_xarray.update(
        {SCALAR_SKEWNESS_KEY: (these_dim, scalar_skewness_matrix)})

    vector_skewness_matrix = scipy.stats.skew(
        prediction_dict[prediction_io.VECTOR_TARGETS_KEY],
        axis=0,
        bias=False,
        nan_policy='omit')

    these_dim = (evaluation.HEIGHT_DIM, evaluation.VECTOR_FIELD_DIM)
    result_table_xarray.update(
        {VECTOR_SKEWNESS_KEY: (these_dim, vector_skewness_matrix)})

    try:
        _ = result_table_xarray.coords[evaluation.AUX_TARGET_FIELD_DIM].values
    except KeyError:
        return result_table_xarray

    example_dict = {
        example_utils.SCALAR_TARGET_NAMES_KEY:
        result_table_xarray.coords[evaluation.SCALAR_FIELD_DIM].values,
        example_utils.VECTOR_TARGET_NAMES_KEY:
        result_table_xarray.coords[evaluation.VECTOR_FIELD_DIM].values,
        example_utils.HEIGHTS_KEY:
        numpy.round(result_table_xarray.coords[
            evaluation.HEIGHT_DIM].values).astype(int)
    }

    aux_target_matrix = evaluation.get_aux_fields(
        prediction_dict=prediction_dict,
        example_dict=example_dict)[evaluation.AUX_TARGET_VALS_KEY]

    aux_skewness_matrix = scipy.stats.skew(aux_target_matrix,
                                           axis=0,
                                           bias=False,
                                           nan_policy='omit')

    these_dim = (evaluation.AUX_TARGET_FIELD_DIM, )
    result_table_xarray.update(
        {AUX_SKEWNESS_KEY: (these_dim, aux_skewness_matrix)})

    return result_table_xarray
Пример #14
0
def _run(prediction_file_names, separate_by_height, output_dir_name):
    """Trains isotonic-regression model.

    This is effectively the main method.

    :param prediction_file_names: See documentation at top of file.
    :param separate_by_height: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if predictions in `prediction_file_names` were made
        with isotonic regression.
    """

    num_files = len(prediction_file_names)
    prediction_dicts = [dict()] * num_files

    for i in range(num_files):
        print('Reading original predictions from: "{0:s}"...'.format(
            prediction_file_names[i]))
        prediction_dicts[i] = prediction_io.read_file(prediction_file_names[i])

    prediction_dict = prediction_io.concat_predictions(prediction_dicts)

    if prediction_dict[prediction_io.ISOTONIC_MODEL_FILE_KEY] is not None:
        raise ValueError(
            'Predictions used for training isotonic regression must be made'
            ' with base model only (i.e., must not already include isotonic'
            ' regression).')

    orig_vector_prediction_matrix = (
        None if prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY].size == 0
        else prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY])
    vector_target_matrix = (
        None if prediction_dict[prediction_io.VECTOR_TARGETS_KEY].size == 0
        else prediction_dict[prediction_io.VECTOR_TARGETS_KEY])
    orig_scalar_prediction_matrix = (
        None if prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY].size == 0
        else prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY])
    scalar_target_matrix = (
        None if prediction_dict[prediction_io.SCALAR_TARGETS_KEY].size == 0
        else prediction_dict[prediction_io.SCALAR_TARGETS_KEY])

    print(SEPARATOR_STRING)
    scalar_model_objects, vector_model_object_matrix = (
        isotonic_regression.train_models(
            orig_vector_prediction_matrix=orig_vector_prediction_matrix,
            orig_scalar_prediction_matrix=orig_scalar_prediction_matrix,
            vector_target_matrix=vector_target_matrix,
            scalar_target_matrix=scalar_target_matrix,
            separate_by_height=separate_by_height))
    print(SEPARATOR_STRING)

    output_file_name = isotonic_regression.find_file(
        model_dir_name=output_dir_name, raise_error_if_missing=False)

    print('Writing isotonic-regression models to: "{0:s}"...'.format(
        output_file_name))

    isotonic_regression.write_file(
        dill_file_name=output_file_name,
        scalar_model_objects=scalar_model_objects,
        vector_model_object_matrix=vector_model_object_matrix)