def _run(input_file_name, top_output_dir_name): """Splits predictions by site (point location). This is effectively the main method. :param input_file_name: See documentation at top of file. :param top_output_dir_name: Same. :raises: ValueError: if any example cannot be assigned to a site. """ # Read data. print('Reading data from: "{0:s}"...'.format(input_file_name)) prediction_dict = prediction_io.read_file(input_file_name) example_metadata_dict = example_utils.parse_example_ids( prediction_dict[prediction_io.EXAMPLE_IDS_KEY]) example_latitudes_deg_n = number_rounding.round_to_nearest( example_metadata_dict[example_utils.LATITUDES_KEY], LATLNG_TOLERANCE_DEG) example_longitudes_deg_e = number_rounding.round_to_nearest( example_metadata_dict[example_utils.LONGITUDES_KEY], LATLNG_TOLERANCE_DEG) example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west( example_longitudes_deg_e) num_examples = len(example_latitudes_deg_n) example_written_flags = numpy.full(num_examples, False, dtype=bool) site_names = list(SITE_NAME_TO_LATLNG.keys()) num_sites = len(site_names) for j in range(num_sites): this_site_latitude_deg_n = SITE_NAME_TO_LATLNG[site_names[j]][0] this_site_longitude_deg_e = SITE_NAME_TO_LATLNG[site_names[j]][1] these_indices = numpy.where( numpy.logical_and( numpy.absolute(example_latitudes_deg_n - this_site_latitude_deg_n) <= LATLNG_TOLERANCE_DEG, numpy.absolute(example_longitudes_deg_e - this_site_longitude_deg_e) <= LATLNG_TOLERANCE_DEG))[0] this_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=these_indices) this_output_file_name = '{0:s}/{1:s}/predictions.nc'.format( top_output_dir_name, site_names[j]) print('Writing {0:d} examples to: "{1:s}"...'.format( len(these_indices), this_output_file_name)) if len(these_indices) == 0: continue example_written_flags[these_indices] = True prediction_io.write_file( netcdf_file_name=this_output_file_name, scalar_target_matrix=this_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=this_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=this_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=this_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=this_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY]) if numpy.all(example_written_flags): return # bad_latitudes_deg_n = ( # example_latitudes_deg_n[example_written_flags == False] # ) # bad_longitudes_deg_e = ( # example_longitudes_deg_e[example_written_flags == False] # ) # bad_coord_matrix = numpy.transpose(numpy.vstack(( # bad_latitudes_deg_n, bad_longitudes_deg_e # ))) # bad_coord_matrix = numpy.unique(bad_coord_matrix, axis=0) # print(bad_coord_matrix) error_string = ( '{0:d} of {1:d} examples could not be assigned to a site. This is a ' 'BIG PROBLEM.').format(numpy.sum(example_written_flags == False), num_examples) raise ValueError(error_string)
def _run(input_file_name, min_latitude_deg, max_latitude_deg, min_longitude_deg, max_longitude_deg, latitude_spacing_deg, longitude_spacing_deg, output_dir_name): """Splits predictions by spatial region. This is effectively the main method. :param input_file_name: See documentation at top of file. :param min_latitude_deg: Same. :param max_latitude_deg: Same. :param min_longitude_deg: Same. :param max_longitude_deg: Same. :param latitude_spacing_deg: Same. :param longitude_spacing_deg: Same. :param output_dir_name: Same. """ # Read data. print('Reading data from: "{0:s}"...'.format(input_file_name)) prediction_dict = prediction_io.read_file(input_file_name) example_metadata_dict = example_utils.parse_example_ids( prediction_dict[prediction_io.EXAMPLE_IDS_KEY] ) example_latitudes_deg = example_metadata_dict[example_utils.LATITUDES_KEY] example_longitudes_deg = example_metadata_dict[example_utils.LONGITUDES_KEY] these_limits_deg = numpy.array([ min_latitude_deg, max_latitude_deg, min_longitude_deg, max_longitude_deg ]) if numpy.any(numpy.isnan(these_limits_deg)): min_latitude_deg = numpy.min(example_latitudes_deg) max_latitude_deg = numpy.max(example_latitudes_deg) min_longitude_deg = numpy.min(example_longitudes_deg) max_longitude_deg = numpy.max(example_longitudes_deg) # Create grid. grid_point_latitudes_deg, grid_point_longitudes_deg = ( misc.create_latlng_grid( min_latitude_deg=min_latitude_deg, max_latitude_deg=max_latitude_deg, latitude_spacing_deg=latitude_spacing_deg, min_longitude_deg=min_longitude_deg, max_longitude_deg=max_longitude_deg, longitude_spacing_deg=longitude_spacing_deg ) ) num_grid_rows = len(grid_point_latitudes_deg) num_grid_columns = len(grid_point_longitudes_deg) grid_edge_latitudes_deg, grid_edge_longitudes_deg = ( grids.get_latlng_grid_cell_edges( min_latitude_deg=grid_point_latitudes_deg[0], min_longitude_deg=grid_point_longitudes_deg[0], lat_spacing_deg=numpy.diff(grid_point_latitudes_deg[:2])[0], lng_spacing_deg=numpy.diff(grid_point_longitudes_deg[:2])[0], num_rows=num_grid_rows, num_columns=num_grid_columns ) ) print(SEPARATOR_STRING) for i in range(num_grid_rows): for j in range(num_grid_columns): these_indices = grids.find_events_in_grid_cell( event_x_coords_metres=example_longitudes_deg, event_y_coords_metres=example_latitudes_deg, grid_edge_x_coords_metres=grid_edge_longitudes_deg, grid_edge_y_coords_metres=grid_edge_latitudes_deg, row_index=i, column_index=j, verbose=False ) this_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=these_indices ) this_num_examples = len( this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY] ) if this_num_examples == 0: continue this_output_file_name = prediction_io.find_file( directory_name=output_dir_name, grid_row=i, grid_column=j, raise_error_if_missing=False ) print('Writing {0:d} examples to: "{1:s}"...'.format( len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]), this_output_file_name )) prediction_io.write_file( netcdf_file_name=this_output_file_name, scalar_target_matrix= this_prediction_dict[prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix= this_prediction_dict[prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix= this_prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix= this_prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings= this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY], model_file_name= this_prediction_dict[prediction_io.MODEL_FILE_KEY] ) print(SEPARATOR_STRING) grid_metafile_name = prediction_io.find_grid_metafile( prediction_dir_name=output_dir_name, raise_error_if_missing=False ) print('Writing grid metadata to: "{0:s}"...'.format(grid_metafile_name)) prediction_io.write_grid_metafile( grid_point_latitudes_deg=grid_point_latitudes_deg, grid_point_longitudes_deg=grid_point_longitudes_deg, netcdf_file_name=grid_metafile_name )
def _run(input_prediction_file_name, num_examples_per_set, output_dir_name): """Finds best and worst heating-rate predictions. This is effectively the main method. :param input_prediction_file_name: See documentation at top of file. :param num_examples_per_set: Same. :param output_dir_name: Same. """ error_checking.assert_is_greater(num_examples_per_set, 0) print('Reading data from: "{0:s}"...'.format(input_prediction_file_name)) prediction_dict = prediction_io.read_file(input_prediction_file_name) model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY] model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0]) print( 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] scalar_target_names = ( generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY]) down_index = scalar_target_names.index( example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME) up_index = scalar_target_names.index( example_utils.SHORTWAVE_TOA_UP_FLUX_NAME) targets_w_m02 = ( prediction_dict[prediction_io.SCALAR_TARGETS_KEY][..., down_index] - prediction_dict[prediction_io.SCALAR_TARGETS_KEY][..., up_index]) predictions_w_m02 = ( prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY][..., down_index] - prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY][..., up_index]) biases_w_m02 = predictions_w_m02 - targets_w_m02 bias_matrix = numpy.expand_dims(biases_w_m02, axis=1) print(SEPARATOR_STRING) high_bias_indices, low_bias_indices, low_abs_error_indices = ( misc_utils.find_best_and_worst_predictions( bias_matrix=bias_matrix, absolute_error_matrix=numpy.absolute(bias_matrix), num_examples_per_set=num_examples_per_set)) print(SEPARATOR_STRING) high_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=high_bias_indices) high_bias_file_name = ( '{0:s}/predictions_high-bias.nc'.format(output_dir_name)) print('Writing examples with greatest positive bias to: "{0:s}"...'.format( high_bias_file_name)) prediction_io.write_file( netcdf_file_name=high_bias_file_name, scalar_target_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=high_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=high_bias_prediction_dict[ prediction_io.MODEL_FILE_KEY]) low_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_bias_indices) low_bias_file_name = ( '{0:s}/predictions_low-bias.nc'.format(output_dir_name)) print('Writing examples with greatest negative bias to: "{0:s}"...'.format( low_bias_file_name)) prediction_io.write_file( netcdf_file_name=low_bias_file_name, scalar_target_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY]) low_abs_error_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_abs_error_indices) low_abs_error_file_name = ( '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name)) print( 'Writing examples with smallest absolute error to: "{0:s}"...'.format( low_abs_error_file_name)) prediction_io.write_file( netcdf_file_name=low_abs_error_file_name, scalar_target_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_abs_error_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_abs_error_prediction_dict[ prediction_io.MODEL_FILE_KEY]) sort_indices = numpy.argsort(-1 * targets_w_m02) large_net_flux_indices = sort_indices[:num_examples_per_set] large_net_flux_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=large_net_flux_indices) large_net_flux_file_name = ( '{0:s}/predictions_large-net-flux.nc'.format(output_dir_name)) print('Writing examples with greatest net flux to: "{0:s}"...'.format( large_net_flux_file_name)) prediction_io.write_file( netcdf_file_name=large_net_flux_file_name, scalar_target_matrix=large_net_flux_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=large_net_flux_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=large_net_flux_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=large_net_flux_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=large_net_flux_prediction_dict[ prediction_io.HEIGHTS_KEY], example_id_strings=large_net_flux_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=large_net_flux_prediction_dict[ prediction_io.MODEL_FILE_KEY]) sort_indices = numpy.argsort(targets_w_m02) small_net_flux_indices = sort_indices[:num_examples_per_set] small_net_flux_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=small_net_flux_indices) small_net_flux_file_name = ( '{0:s}/predictions_small-net-flux.nc'.format(output_dir_name)) print('Writing examples with smallest net flux to: "{0:s}"...'.format( small_net_flux_file_name)) prediction_io.write_file( netcdf_file_name=small_net_flux_file_name, scalar_target_matrix=small_net_flux_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=small_net_flux_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=small_net_flux_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=small_net_flux_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=small_net_flux_prediction_dict[ prediction_io.HEIGHTS_KEY], example_id_strings=small_net_flux_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=small_net_flux_prediction_dict[ prediction_io.MODEL_FILE_KEY])
def _run(prediction_file_name, num_examples, example_dir_name, use_log_scale, output_dir_name): """Plots comparisons between predicted and actual (target) profiles. This is effectively the main method. :param prediction_file_name: See documentation at top of file. :param num_examples: Same. :param example_dir_name: Same. :param use_log_scale: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) if num_examples < 1: num_examples = None if example_dir_name == '': example_dir_name = None print(('Reading predicted and actual (target) profiles from: "{0:s}"...' ).format(prediction_file_name)) prediction_dict = prediction_io.read_file(prediction_file_name) num_examples_orig = len(prediction_dict[prediction_io.EXAMPLE_IDS_KEY]) if num_examples is not None and num_examples < num_examples_orig: desired_indices = numpy.linspace(0, num_examples - 1, num=num_examples, dtype=int) prediction_dict = prediction_io.subset_by_index( prediction_dict=prediction_dict, desired_indices=desired_indices) vector_target_matrix = prediction_dict[prediction_io.VECTOR_TARGETS_KEY] vector_prediction_matrix = ( prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY]) scalar_target_matrix = prediction_dict[prediction_io.SCALAR_TARGETS_KEY] scalar_prediction_matrix = ( prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY]) model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY] model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0], raise_error_if_missing=True) print( 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) model_metadata_dict[neural_net.TRAINING_OPTIONS_KEY][ neural_net.HEIGHTS_KEY] = prediction_dict[prediction_io.HEIGHTS_KEY] # If necessary, convert flux increments to fluxes. vector_target_matrix, vector_prediction_matrix, model_metadata_dict = ( _fluxes_increments_to_actual( vector_target_matrix=vector_target_matrix, vector_prediction_matrix=vector_prediction_matrix, model_metadata_dict=model_metadata_dict)) # If necessary, convert fluxes to heating rates. vector_target_matrix, vector_prediction_matrix, model_metadata_dict = ( _fluxes_to_heating_rate( vector_target_matrix=vector_target_matrix, vector_prediction_matrix=vector_prediction_matrix, model_metadata_dict=model_metadata_dict, prediction_file_name=prediction_file_name, example_dir_name=example_dir_name)) # If data include both upwelling and downwelling fluxes, remove flux # increments (they need not be plotted). vector_target_matrix, vector_prediction_matrix, model_metadata_dict = ( _remove_flux_increments( vector_target_matrix=vector_target_matrix, vector_prediction_matrix=vector_prediction_matrix, model_metadata_dict=model_metadata_dict)) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) plot_fancy = all( [t in vector_target_names for t in DEFAULT_VECTOR_TARGET_NAMES]) if plot_fancy: _plot_comparisons_fancy( vector_target_matrix=vector_target_matrix, vector_prediction_matrix=vector_prediction_matrix, example_id_strings=prediction_dict[prediction_io.EXAMPLE_IDS_KEY], model_metadata_dict=model_metadata_dict, use_log_scale=use_log_scale, output_dir_name=output_dir_name) else: title_strings = _get_flux_strings( scalar_target_matrix=scalar_target_matrix, scalar_prediction_matrix=scalar_prediction_matrix, model_metadata_dict=model_metadata_dict) _plot_comparisons_simple( vector_target_matrix=vector_target_matrix, vector_prediction_matrix=vector_prediction_matrix, example_id_strings=prediction_dict[prediction_io.EXAMPLE_IDS_KEY], model_metadata_dict=model_metadata_dict, use_log_scale=use_log_scale, title_strings=title_strings, output_dir_name=output_dir_name)
def _run(input_prediction_file_name, average_over_height, scale_by_climo, num_examples_per_set, output_dir_name): """Finds best and worst heating-rate predictions. This is effectively the main method. :param input_prediction_file_name: See documentation at top of file. :param average_over_height: Same. :param scale_by_climo: Same. :param num_examples_per_set: Same. :param output_dir_name: Same. """ # TODO(thunderhoser): Maybe allow specific height again (e.g., 15 km). error_checking.assert_is_greater(num_examples_per_set, 0) scale_by_climo = scale_by_climo and not average_over_height print('Reading data from: "{0:s}"...'.format(input_prediction_file_name)) prediction_dict = prediction_io.read_file(input_prediction_file_name) model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY] model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0]) print( 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) hr_index = (vector_target_names.index( example_utils.SHORTWAVE_HEATING_RATE_NAME)) target_matrix_k_day01 = ( prediction_dict[prediction_io.VECTOR_TARGETS_KEY][..., hr_index]) prediction_matrix_k_day01 = ( prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY][..., hr_index]) bias_matrix = prediction_matrix_k_day01 - target_matrix_k_day01 absolute_error_matrix = numpy.absolute(bias_matrix) if average_over_height: bias_matrix = numpy.mean(bias_matrix, axis=1, keepdims=True) absolute_error_matrix = numpy.mean(absolute_error_matrix, axis=1, keepdims=True) if scale_by_climo: normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]) print(('Reading training examples (for climatology) from: "{0:s}"...' ).format(normalization_file_name)) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_field( example_dict=training_example_dict, field_names=[example_utils.SHORTWAVE_HEATING_RATE_NAME]) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]) dummy_example_dict = { example_utils.SCALAR_PREDICTOR_NAMES_KEY: [], example_utils.VECTOR_PREDICTOR_NAMES_KEY: [], example_utils.SCALAR_TARGET_NAMES_KEY: [], example_utils.VECTOR_TARGET_NAMES_KEY: [example_utils.SHORTWAVE_HEATING_RATE_NAME], example_utils.HEIGHTS_KEY: generator_option_dict[neural_net.HEIGHTS_KEY] } mean_training_example_dict = normalization.create_mean_example( new_example_dict=dummy_example_dict, training_example_dict=training_example_dict) climo_matrix_k_day01 = mean_training_example_dict[ example_utils.VECTOR_TARGET_VALS_KEY][..., 0] bias_matrix = bias_matrix / climo_matrix_k_day01 absolute_error_matrix = absolute_error_matrix / climo_matrix_k_day01 print(SEPARATOR_STRING) high_bias_indices, low_bias_indices, low_abs_error_indices = ( misc_utils.find_best_and_worst_predictions( bias_matrix=bias_matrix, absolute_error_matrix=absolute_error_matrix, num_examples_per_set=num_examples_per_set)) print(SEPARATOR_STRING) high_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=high_bias_indices) high_bias_file_name = ( '{0:s}/predictions_high-bias.nc'.format(output_dir_name)) print('Writing examples with greatest positive bias to: "{0:s}"...'.format( high_bias_file_name)) prediction_io.write_file( netcdf_file_name=high_bias_file_name, scalar_target_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=high_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=high_bias_prediction_dict[ prediction_io.MODEL_FILE_KEY]) low_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_bias_indices) low_bias_file_name = ( '{0:s}/predictions_low-bias.nc'.format(output_dir_name)) print('Writing examples with greatest negative bias to: "{0:s}"...'.format( low_bias_file_name)) prediction_io.write_file( netcdf_file_name=low_bias_file_name, scalar_target_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY]) low_abs_error_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_abs_error_indices) low_abs_error_file_name = ( '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name)) print( 'Writing examples with smallest absolute error to: "{0:s}"...'.format( low_abs_error_file_name)) prediction_io.write_file( netcdf_file_name=low_abs_error_file_name, scalar_target_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_abs_error_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_abs_error_prediction_dict[ prediction_io.MODEL_FILE_KEY]) if scale_by_climo: return if average_over_height: mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(-1 * mean_targets_k_day01) else: max_targets_k_day01 = numpy.max(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(-1 * max_targets_k_day01) large_hr_indices = sort_indices[:num_examples_per_set] large_hr_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=large_hr_indices) large_hr_file_name = ( '{0:s}/predictions_large-heating-rate.nc'.format(output_dir_name)) print('Writing examples with greatest heating rate to: "{0:s}"...'.format( large_hr_file_name)) prediction_io.write_file( netcdf_file_name=large_hr_file_name, scalar_target_matrix=large_hr_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=large_hr_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=large_hr_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=large_hr_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=large_hr_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=large_hr_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=large_hr_prediction_dict[prediction_io.MODEL_FILE_KEY]) if not average_over_height: return mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(mean_targets_k_day01) small_hr_indices = sort_indices[:num_examples_per_set] small_hr_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=small_hr_indices) small_hr_file_name = ( '{0:s}/predictions_small-heating-rate.nc'.format(output_dir_name)) print('Writing examples with smallest heating rate to: "{0:s}"...'.format( small_hr_file_name)) prediction_io.write_file( netcdf_file_name=small_hr_file_name, scalar_target_matrix=small_hr_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=small_hr_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=small_hr_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=small_hr_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=small_hr_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=small_hr_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=small_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])
def _run(input_file_name, example_dir_name, for_ice, min_path_kg_m02, output_dir_name): """Splits predictions by cloud regime. This is effectively the main method. :param input_file_name: See documentation at top of file. :param example_dir_name: Same. :param for_ice: Same. :param min_path_kg_m02: Same. :param output_dir_name: Same. """ print('Reading data from: "{0:s}"...\n'.format(input_file_name)) prediction_dict = prediction_io.read_file(input_file_name) example_dict = misc_utils.get_raw_examples( example_file_name='', num_examples=int(1e12), example_dir_name=example_dir_name, example_id_file_name=input_file_name) print(SEPARATOR_STRING) cloud_layer_counts = example_utils.find_cloud_layers( example_dict=example_dict, min_path_kg_m02=min_path_kg_m02, for_ice=for_ice)[-1] unique_cloud_layer_counts, unique_example_counts = numpy.unique( cloud_layer_counts, return_counts=True) for i in range(len(unique_cloud_layer_counts)): print( ('Number of examples with {0:d} cloud layers ' '({1:s}-water path >= {2:.1f} g m^-2) = {3:d}').format( unique_cloud_layer_counts[i], 'ice' if for_ice else 'liquid', KG_TO_GRAMS * min_path_kg_m02, unique_example_counts[i])) print(SEPARATOR_STRING) num_output_files = len(MIN_LAYERS_BY_FILE) for k in range(num_output_files): these_indices = numpy.where( numpy.logical_and(cloud_layer_counts >= MIN_LAYERS_BY_FILE[k], cloud_layer_counts <= MAX_LAYERS_BY_FILE[k]))[0] this_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=these_indices) this_output_file_name = '{0:s}/predictions_{1:s}.nc'.format( output_dir_name, FILE_SUFFIXES[k]) print('Writing {0:d} examples to: "{1:s}"...'.format( len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]), this_output_file_name)) prediction_io.write_file( netcdf_file_name=this_output_file_name, scalar_target_matrix=this_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=this_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=this_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=this_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=this_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])