def _run(input_file_name, top_tracking_dir_name, min_latitude_deg, max_latitude_deg, min_longitude_deg, max_longitude_deg, grid_spacing_metres, output_dir_name): """Subsets ungridded predictions by space. This is effectively the main method. :param input_file_name: See documentation at top of file. :param top_tracking_dir_name: Same. :param min_latitude_deg: Same. :param max_latitude_deg: Same. :param min_longitude_deg: Same. :param max_longitude_deg: Same. :param grid_spacing_metres: Same. :param output_dir_name: Same. """ equidistant_grid_dict = grids.create_equidistant_grid( min_latitude_deg=min_latitude_deg, max_latitude_deg=max_latitude_deg, min_longitude_deg=min_longitude_deg, max_longitude_deg=max_longitude_deg, x_spacing_metres=grid_spacing_metres, y_spacing_metres=grid_spacing_metres, azimuthal=False) grid_metafile_name = grids.find_equidistant_metafile( directory_name=output_dir_name, raise_error_if_missing=False) print('Writing metadata for equidistant grid to: "{0:s}"...'.format( grid_metafile_name )) grids.write_equidistant_metafile(grid_dict=equidistant_grid_dict, pickle_file_name=grid_metafile_name) grid_point_x_coords_metres = equidistant_grid_dict[grids.X_COORDS_KEY] grid_point_y_coords_metres = equidistant_grid_dict[grids.Y_COORDS_KEY] projection_object = equidistant_grid_dict[grids.PROJECTION_KEY] grid_edge_x_coords_metres = numpy.append( grid_point_x_coords_metres - 0.5 * grid_spacing_metres, grid_point_x_coords_metres[-1] + 0.5 * grid_spacing_metres ) grid_edge_y_coords_metres = numpy.append( grid_point_y_coords_metres - 0.5 * grid_spacing_metres, grid_point_y_coords_metres[-1] + 0.5 * grid_spacing_metres ) print('Reading input data from: "{0:s}"...'.format(input_file_name)) prediction_dict = prediction_io.read_ungridded_predictions(input_file_name) print(SEPARATOR_STRING) full_id_strings = prediction_dict[prediction_io.STORM_IDS_KEY] storm_times_unix_sec = prediction_dict[prediction_io.STORM_TIMES_KEY] unique_storm_times_unix_sec = numpy.unique(storm_times_unix_sec) num_storm_objects = len(storm_times_unix_sec) storm_latitudes_deg = numpy.full(num_storm_objects, numpy.nan) storm_longitudes_deg = numpy.full(num_storm_objects, numpy.nan) for this_time_unix_sec in unique_storm_times_unix_sec: these_indices = numpy.where( storm_times_unix_sec == this_time_unix_sec )[0] these_full_id_strings = [full_id_strings[k] for k in these_indices] (storm_latitudes_deg[these_indices], storm_longitudes_deg[these_indices] ) = _read_storm_locations_one_time( top_tracking_dir_name=top_tracking_dir_name, valid_time_unix_sec=this_time_unix_sec, desired_full_id_strings=these_full_id_strings) print(SEPARATOR_STRING) storm_x_coords_metres, storm_y_coords_metres = ( projections.project_latlng_to_xy( latitudes_deg=storm_latitudes_deg, longitudes_deg=storm_longitudes_deg, projection_object=projection_object) ) num_grid_rows = len(grid_point_y_coords_metres) num_grid_columns = len(grid_point_x_coords_metres) for i in range(num_grid_rows): for j in range(num_grid_columns): these_indices = grids.find_events_in_grid_cell( event_x_coords_metres=storm_x_coords_metres, event_y_coords_metres=storm_y_coords_metres, grid_edge_x_coords_metres=grid_edge_x_coords_metres, grid_edge_y_coords_metres=grid_edge_y_coords_metres, row_index=i, column_index=j, verbose=True) if len(these_indices) == 0: continue this_prediction_dict = prediction_io.subset_ungridded_predictions( prediction_dict=prediction_dict, desired_storm_indices=these_indices) this_output_file_name = prediction_io.find_ungridded_file( directory_name=output_dir_name, grid_row=i, grid_column=j, raise_error_if_missing=False) print('Writing subset to: "{0:s}"...'.format(this_output_file_name)) prediction_io.write_ungridded_predictions( netcdf_file_name=this_output_file_name, class_probability_matrix=this_prediction_dict[ prediction_io.PROBABILITY_MATRIX_KEY], storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY], storm_times_unix_sec=this_prediction_dict[ prediction_io.STORM_TIMES_KEY], observed_labels=this_prediction_dict[ prediction_io.OBSERVED_LABELS_KEY], target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY], model_file_name=this_prediction_dict[ prediction_io.MODEL_FILE_KEY] ) print('\n')
def _run(prediction_file_names, top_match_dir_name, unique_storm_cells, num_hits, num_misses, num_false_alarms, num_correct_nulls, num_disagreements, output_dir_names): """Finds extreme examples vis-a-vis two models. This is effectively the main method. :param prediction_file_names: See documentation at top of file. :param top_match_dir_name: Same. :param unique_storm_cells: Same. :param num_hits: Same. :param num_misses: Same. :param num_false_alarms: Same. :param num_correct_nulls: Same. :param num_disagreements: Same. :param output_dir_names: Same. """ # TODO(thunderhoser): Throw error if multiclass predictions are read. # Check input args. example_counts = numpy.array([ num_hits, num_misses, num_false_alarms, num_correct_nulls, num_disagreements ], dtype=int) error_checking.assert_is_geq_numpy_array(example_counts, 0) first_output_dir_name = output_dir_names[0] file_system_utils.mkdir_recursive_if_necessary( directory_name=first_output_dir_name) second_output_dir_name = output_dir_names[1] file_system_utils.mkdir_recursive_if_necessary( directory_name=second_output_dir_name) # Match storm objects between the two prediction files. print('Reading data from: "{0:s}"...'.format(prediction_file_names[0])) first_prediction_dict = prediction_io.read_ungridded_predictions( prediction_file_names[0]) print('Reading data from: "{0:s}"...'.format(prediction_file_names[1])) second_prediction_dict = prediction_io.read_ungridded_predictions( prediction_file_names[1]) print(SEPARATOR_STRING) first_prediction_dict, second_prediction_dict = _match_storm_objects( first_prediction_dict=first_prediction_dict, second_prediction_dict=second_prediction_dict, top_match_dir_name=top_match_dir_name) print(SEPARATOR_STRING) observed_labels = first_prediction_dict[prediction_io.OBSERVED_LABELS_KEY] first_model_file_name = first_prediction_dict[prediction_io.MODEL_FILE_KEY] first_full_id_strings = first_prediction_dict[prediction_io.STORM_IDS_KEY] first_storm_times_unix_sec = first_prediction_dict[ prediction_io.STORM_TIMES_KEY] first_probabilities = first_prediction_dict[ prediction_io.PROBABILITY_MATRIX_KEY][:, 1] second_model_file_name = second_prediction_dict[ prediction_io.MODEL_FILE_KEY] second_full_id_strings = second_prediction_dict[ prediction_io.STORM_IDS_KEY] second_storm_times_unix_sec = second_prediction_dict[ prediction_io.STORM_TIMES_KEY] second_probabilities = second_prediction_dict[ prediction_io.PROBABILITY_MATRIX_KEY][:, 1] if num_disagreements > 0: second_high_indices, first_high_indices = ( model_activation.get_hilo_activation_examples( storm_activations=second_probabilities - first_probabilities, num_low_activation_examples=num_disagreements, num_high_activation_examples=num_disagreements, unique_storm_cells=unique_storm_cells, full_storm_id_strings=first_full_id_strings)) # Print summary to command window. this_mean_diff = numpy.mean(second_probabilities[second_high_indices] - first_probabilities[second_high_indices]) print(( 'Average prob difference for {0:d} worst disagreements with second ' 'model higher: {1:.3f}').format(num_disagreements, this_mean_diff)) this_mean_diff = numpy.mean(second_probabilities[first_high_indices] - first_probabilities[first_high_indices]) print(( 'Average prob difference for {0:d} worst disagreements with first ' 'model higher: {1:.3f}').format(num_disagreements, this_mean_diff)) # Write file. this_activation_file_name = '{0:s}/low_disagreement_examples.p'.format( first_output_dir_name) print(('Writing disagreements (second model higher) to: "{0:s}"...' ).format(this_activation_file_name)) this_activation_matrix = numpy.reshape( first_probabilities[second_high_indices], (len(second_high_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ first_full_id_strings[j] for j in second_high_indices ], storm_times_unix_sec=first_storm_times_unix_sec[ second_high_indices], model_file_name=first_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) # Write file. this_activation_file_name = '{0:s}/high_disagreement_examples.p'.format( second_output_dir_name) print(('Writing disagreements (second model higher) to: "{0:s}"...' ).format(this_activation_file_name)) this_activation_matrix = numpy.reshape( second_probabilities[second_high_indices], (len(second_high_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ second_full_id_strings[j] for j in second_high_indices ], storm_times_unix_sec=second_storm_times_unix_sec[ second_high_indices], model_file_name=second_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) # Write file. this_activation_file_name = '{0:s}/high_disagreement_examples.p'.format( first_output_dir_name) print(('Writing disagreements (first model higher) to: "{0:s}"...' ).format(this_activation_file_name)) this_activation_matrix = numpy.reshape( first_probabilities[first_high_indices], (len(first_high_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ first_full_id_strings[j] for j in first_high_indices ], storm_times_unix_sec=first_storm_times_unix_sec[ first_high_indices], model_file_name=first_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) # Write file. this_activation_file_name = '{0:s}/low_disagreement_examples.p'.format( second_output_dir_name) print(('Writing disagreements (first model higher) to: "{0:s}"...' ).format(this_activation_file_name)) this_activation_matrix = numpy.reshape( second_probabilities[first_high_indices], (len(first_high_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ second_full_id_strings[j] for j in first_high_indices ], storm_times_unix_sec=second_storm_times_unix_sec[ first_high_indices], model_file_name=second_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) if num_hits + num_misses + num_false_alarms + num_correct_nulls == 0: return mean_probabilities = 0.5 * (first_probabilities + second_probabilities) ct_extreme_dict = model_activation.get_contingency_table_extremes( storm_activations=mean_probabilities, storm_target_values=observed_labels, num_hits=num_hits, num_misses=num_misses, num_false_alarms=num_false_alarms, num_correct_nulls=num_correct_nulls, unique_storm_cells=unique_storm_cells, full_storm_id_strings=first_full_id_strings) hit_indices = ct_extreme_dict[model_activation.HIT_INDICES_KEY] miss_indices = ct_extreme_dict[model_activation.MISS_INDICES_KEY] false_alarm_indices = ct_extreme_dict[ model_activation.FALSE_ALARM_INDICES_KEY] correct_null_indices = ct_extreme_dict[ model_activation.CORRECT_NULL_INDICES_KEY] if num_hits > 0: print(( 'Mean probability from first and second model for {0:d} best hits: ' '{1:.3f}, {2:.3f}').format( num_hits, numpy.mean(first_probabilities[hit_indices]), numpy.mean(second_probabilities[hit_indices]))) this_activation_file_name = '{0:s}/best_hits.p'.format( first_output_dir_name) print('Writing best hits to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( first_probabilities[hit_indices], (len(hit_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[first_full_id_strings[j] for j in hit_indices], storm_times_unix_sec=first_storm_times_unix_sec[hit_indices], model_file_name=first_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) this_activation_file_name = '{0:s}/best_hits.p'.format( second_output_dir_name) print('Writing best hits to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( second_probabilities[hit_indices], (len(hit_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[second_full_id_strings[j] for j in hit_indices], storm_times_unix_sec=second_storm_times_unix_sec[hit_indices], model_file_name=second_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) if num_misses > 0: print(('Mean probability from first and second model for {0:d} worst ' 'misses: {1:.3f}, {2:.3f}').format( num_misses, numpy.mean(first_probabilities[miss_indices]), numpy.mean(second_probabilities[miss_indices]))) this_activation_file_name = '{0:s}/worst_misses.p'.format( first_output_dir_name) print('Writing worst misses to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( first_probabilities[miss_indices], (len(miss_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[first_full_id_strings[j] for j in miss_indices], storm_times_unix_sec=first_storm_times_unix_sec[miss_indices], model_file_name=first_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) this_activation_file_name = '{0:s}/worst_misses.p'.format( second_output_dir_name) print('Writing worst misses to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( second_probabilities[miss_indices], (len(miss_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[second_full_id_strings[j] for j in miss_indices], storm_times_unix_sec=second_storm_times_unix_sec[miss_indices], model_file_name=second_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) if num_false_alarms > 0: print(('Mean probability from first and second model for {0:d} worst ' 'false alarms: {1:.3f}, {2:.3f}').format( num_false_alarms, numpy.mean(first_probabilities[false_alarm_indices]), numpy.mean(second_probabilities[false_alarm_indices]))) this_activation_file_name = '{0:s}/worst_false_alarms.p'.format( first_output_dir_name) print('Writing worst false alarms to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( first_probabilities[false_alarm_indices], (len(false_alarm_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ first_full_id_strings[j] for j in false_alarm_indices ], storm_times_unix_sec=first_storm_times_unix_sec[ false_alarm_indices], model_file_name=first_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) this_activation_file_name = '{0:s}/worst_false_alarms.p'.format( second_output_dir_name) print('Writing worst false alarms to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( second_probabilities[false_alarm_indices], (len(false_alarm_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ second_full_id_strings[j] for j in false_alarm_indices ], storm_times_unix_sec=second_storm_times_unix_sec[ false_alarm_indices], model_file_name=second_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) if num_correct_nulls > 0: print(('Mean probability from first and second model for {0:d} best ' 'correct nulls: {1:.3f}, {2:.3f}').format( num_correct_nulls, numpy.mean(first_probabilities[correct_null_indices]), numpy.mean(second_probabilities[correct_null_indices]))) this_activation_file_name = '{0:s}/best_correct_nulls.p'.format( first_output_dir_name) print('Writing best correct nulls to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( first_probabilities[correct_null_indices], (len(correct_null_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ first_full_id_strings[j] for j in correct_null_indices ], storm_times_unix_sec=first_storm_times_unix_sec[ correct_null_indices], model_file_name=first_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1) this_activation_file_name = '{0:s}/best_correct_nulls.p'.format( second_output_dir_name) print('Writing best correct nulls to: "{0:s}"...'.format( this_activation_file_name)) this_activation_matrix = numpy.reshape( second_probabilities[correct_null_indices], (len(correct_null_indices), 1)) model_activation.write_file( pickle_file_name=this_activation_file_name, activation_matrix=this_activation_matrix, full_id_strings=[ second_full_id_strings[j] for j in correct_null_indices ], storm_times_unix_sec=second_storm_times_unix_sec[ correct_null_indices], model_file_name=second_model_file_name, component_type_string=CLASS_COMPONENT_STRING, target_class=1)
def _run(prediction_file_name, best_prob_threshold, upgraded_min_ef_rating, top_target_dir_name, num_bootstrap_reps, downsampling_fractions, output_dir_name): """Evaluates CNN predictions. This is effectively the main method. :param prediction_file_name: See documentation at top of file. :param best_prob_threshold: Same. :param upgraded_min_ef_rating: Same. :param top_target_dir_name: Same. :param num_bootstrap_reps: Same. :param downsampling_fractions: Same. :param output_dir_name: Same. :raises: ValueError: if file contains no examples (storm objects). :raises: ValueError: if file contains multi-class predictions. :raises: ValueError: if you try to upgrade minimum EF rating but the original is non-zero. """ # Verify and process input args. if upgraded_min_ef_rating <= 0: upgraded_min_ef_rating = None num_bootstrap_reps = max([num_bootstrap_reps, 1]) if best_prob_threshold < 0: best_prob_threshold = None # Read predictions. print('Reading data from: "{0:s}"...'.format(prediction_file_name)) prediction_dict = prediction_io.read_ungridded_predictions( prediction_file_name) observed_labels = prediction_dict[prediction_io.OBSERVED_LABELS_KEY] class_probability_matrix = ( prediction_dict[prediction_io.PROBABILITY_MATRIX_KEY]) num_examples = len(observed_labels) num_classes = class_probability_matrix.shape[1] if num_examples == 0: raise ValueError('File contains no examples (storm objects).') if num_classes > 2: error_string = ( 'This script handles only binary, not {0:d}-class, classification.' ).format(num_classes) raise ValueError(error_string) forecast_probabilities = class_probability_matrix[:, -1] # If necessary, upgrade minimum EF rating. if upgraded_min_ef_rating is not None: target_param_dict = target_val_utils.target_name_to_params( prediction_dict[prediction_io.TARGET_NAME_KEY]) orig_min_ef_rating = ( target_param_dict[target_val_utils.MIN_FUJITA_RATING_KEY]) if orig_min_ef_rating != 0: error_string = ( 'Cannot upgrade minimum EF rating when original min rating is ' 'non-zero (in this case it is {0:d}).' ).format(orig_min_ef_rating) raise ValueError(error_string) new_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=target_param_dict[ target_val_utils.MIN_LEAD_TIME_KEY], max_lead_time_sec=target_param_dict[ target_val_utils.MAX_LEAD_TIME_KEY], min_link_distance_metres=target_param_dict[ target_val_utils.MIN_LINKAGE_DISTANCE_KEY], max_link_distance_metres=target_param_dict[ target_val_utils.MAX_LINKAGE_DISTANCE_KEY], tornadogenesis_only=( target_param_dict[target_val_utils.EVENT_TYPE_KEY] == linkage.TORNADOGENESIS_EVENT_STRING), min_fujita_rating=upgraded_min_ef_rating) print(SEPARATOR_STRING) observed_labels = _read_new_target_values( top_target_dir_name=top_target_dir_name, new_target_name=new_target_name, full_storm_id_strings=prediction_dict[prediction_io.STORM_IDS_KEY], storm_times_unix_sec=prediction_dict[ prediction_io.STORM_TIMES_KEY], orig_target_values=observed_labels) print(SEPARATOR_STRING) good_indices = numpy.where(observed_labels >= 0)[0] observed_labels = observed_labels[good_indices] forecast_probabilities = forecast_probabilities[good_indices] # Do calculations. output_file_name = model_eval.find_file_from_prediction_file( input_prediction_file_name=prediction_file_name, output_dir_name=output_dir_name, raise_error_if_missing=False) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) if numpy.any(downsampling_fractions <= 0): downsampling_dict = None else: downsampling_dict = { 0: downsampling_fractions[0], 1: downsampling_fractions[1] } _compute_scores(forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, num_bootstrap_reps=num_bootstrap_reps, best_prob_threshold=best_prob_threshold, downsampling_dict=downsampling_dict, output_file_name=output_file_name)
def _run(input_prediction_file_name, top_tracking_dir_name, tracking_scale_metres2, x_spacing_metres, y_spacing_metres, effective_radius_metres, smoothing_method_name, smoothing_cutoff_radius_metres, smoothing_efold_radius_metres, top_output_dir_name): """Projects CNN forecasts onto the RAP grid. This is effectively the same method. :param input_prediction_file_name: See documentation at top of file. :param top_tracking_dir_name: Same. :param tracking_scale_metres2: Same. :param x_spacing_metres: Same. :param y_spacing_metres: Same. :param effective_radius_metres: Same. :param smoothing_method_name: Same. :param smoothing_cutoff_radius_metres: Same. :param smoothing_efold_radius_metres: Same. :param top_output_dir_name: Same. """ print('Reading data from: "{0:s}"...'.format(input_prediction_file_name)) ungridded_forecast_dict = prediction_io.read_ungridded_predictions( input_prediction_file_name) target_param_dict = target_val_utils.target_name_to_params( ungridded_forecast_dict[prediction_io.TARGET_NAME_KEY]) min_buffer_dist_metres = target_param_dict[ target_val_utils.MIN_LINKAGE_DISTANCE_KEY] # TODO(thunderhoser): This is HACKY. if min_buffer_dist_metres == 0: min_buffer_dist_metres = numpy.nan max_buffer_dist_metres = target_param_dict[ target_val_utils.MAX_LINKAGE_DISTANCE_KEY] min_lead_time_seconds = target_param_dict[ target_val_utils.MIN_LEAD_TIME_KEY] max_lead_time_seconds = target_param_dict[ target_val_utils.MAX_LEAD_TIME_KEY] forecast_column_name = gridded_forecasts._buffer_to_column_name( min_buffer_dist_metres=min_buffer_dist_metres, max_buffer_dist_metres=max_buffer_dist_metres, column_type=gridded_forecasts.FORECAST_COLUMN_TYPE) init_times_unix_sec = numpy.unique( ungridded_forecast_dict[prediction_io.STORM_TIMES_KEY]) tracking_file_names = [] for this_time_unix_sec in init_times_unix_sec: this_tracking_file_name = tracking_io.find_file( top_tracking_dir_name=top_tracking_dir_name, tracking_scale_metres2=tracking_scale_metres2, source_name=tracking_utils.SEGMOTION_NAME, valid_time_unix_sec=this_time_unix_sec, spc_date_string=time_conversion.time_to_spc_date_string( this_time_unix_sec), raise_error_if_missing=True) tracking_file_names.append(this_tracking_file_name) storm_object_table = tracking_io.read_many_files(tracking_file_names) print(SEPARATOR_STRING) tracking_utils.find_storm_objects( all_id_strings=ungridded_forecast_dict[prediction_io.STORM_IDS_KEY], all_times_unix_sec=ungridded_forecast_dict[ prediction_io.STORM_TIMES_KEY], id_strings_to_keep=storm_object_table[ tracking_utils.FULL_ID_COLUMN].values.tolist(), times_to_keep_unix_sec=storm_object_table[ tracking_utils.VALID_TIME_COLUMN].values, allow_missing=False) sort_indices = tracking_utils.find_storm_objects( all_id_strings=storm_object_table[ tracking_utils.FULL_ID_COLUMN].values.tolist(), all_times_unix_sec=storm_object_table[ tracking_utils.VALID_TIME_COLUMN].values, id_strings_to_keep=ungridded_forecast_dict[ prediction_io.STORM_IDS_KEY], times_to_keep_unix_sec=ungridded_forecast_dict[ prediction_io.STORM_TIMES_KEY], allow_missing=False) forecast_probabilities = ungridded_forecast_dict[ prediction_io.PROBABILITY_MATRIX_KEY][sort_indices, 1] storm_object_table = storm_object_table.assign( **{forecast_column_name: forecast_probabilities}) gridded_forecast_dict = gridded_forecasts.create_forecast_grids( storm_object_table=storm_object_table, min_lead_time_sec=min_lead_time_seconds, max_lead_time_sec=max_lead_time_seconds, lead_time_resolution_sec=gridded_forecasts. DEFAULT_LEAD_TIME_RES_SECONDS, grid_spacing_x_metres=x_spacing_metres, grid_spacing_y_metres=y_spacing_metres, interp_to_latlng_grid=False, prob_radius_for_grid_metres=effective_radius_metres, smoothing_method=smoothing_method_name, smoothing_e_folding_radius_metres=smoothing_efold_radius_metres, smoothing_cutoff_radius_metres=smoothing_cutoff_radius_metres) print(SEPARATOR_STRING) output_file_name = prediction_io.find_file( top_prediction_dir_name=top_output_dir_name, first_init_time_unix_sec=numpy.min( storm_object_table[tracking_utils.VALID_TIME_COLUMN].values), last_init_time_unix_sec=numpy.max( storm_object_table[tracking_utils.VALID_TIME_COLUMN].values), gridded=True, raise_error_if_missing=False) print(('Writing results (forecast grids for {0:d} initial times) to: ' '"{1:s}"...').format( len(gridded_forecast_dict[prediction_io.INIT_TIMES_KEY]), output_file_name)) prediction_io.write_gridded_predictions( gridded_forecast_dict=gridded_forecast_dict, pickle_file_name=output_file_name)
def _run(input_file_name, num_months_per_chunk, num_hours_per_chunk, output_dir_name): """Subsets ungridded predictions by time. This is effectively the main method. :param input_file_name: See documentation at top of file. :param num_months_per_chunk: Same. :param num_hours_per_chunk: Same. :param output_dir_name: Same. """ if num_months_per_chunk > 0: chunk_to_months_dict = temporal_subsetting.get_monthly_chunks( num_months_per_chunk=num_months_per_chunk, verbose=True) num_monthly_chunks = len(chunk_to_months_dict.keys()) print(SEPARATOR_STRING) else: num_monthly_chunks = 0 if num_hours_per_chunk > 0: chunk_to_hours_dict = temporal_subsetting.get_hourly_chunks( num_hours_per_chunk=num_hours_per_chunk, verbose=True) num_hourly_chunks = len(chunk_to_hours_dict.keys()) print(SEPARATOR_STRING) else: num_hourly_chunks = 0 print('Reading input data from: "{0:s}"...'.format(input_file_name)) prediction_dict = prediction_io.read_ungridded_predictions(input_file_name) storm_times_unix_sec = prediction_dict[prediction_io.STORM_TIMES_KEY] storm_months = None for i in range(num_monthly_chunks): these_storm_indices, storm_months = ( temporal_subsetting.get_events_in_months( event_months=storm_months, event_times_unix_sec=storm_times_unix_sec, desired_months=chunk_to_months_dict[i], verbose=True)) this_prediction_dict = prediction_io.subset_ungridded_predictions( prediction_dict=prediction_dict, desired_storm_indices=these_storm_indices) this_output_file_name = prediction_io.find_ungridded_file( directory_name=output_dir_name, months_in_subset=chunk_to_months_dict[i], raise_error_if_missing=False) print('Writing temporal subset to: "{0:s}"...'.format( this_output_file_name)) prediction_io.write_ungridded_predictions( netcdf_file_name=this_output_file_name, class_probability_matrix=this_prediction_dict[ prediction_io.PROBABILITY_MATRIX_KEY], storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY], storm_times_unix_sec=this_prediction_dict[ prediction_io.STORM_TIMES_KEY], observed_labels=this_prediction_dict[ prediction_io.OBSERVED_LABELS_KEY], target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY], model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY]) print(SEPARATOR_STRING) storm_hours = None for i in range(num_hourly_chunks): these_storm_indices, storm_hours = ( temporal_subsetting.get_events_in_hours( event_hours=storm_hours, event_times_unix_sec=storm_times_unix_sec, desired_hours=chunk_to_hours_dict[i], verbose=True)) if len(these_storm_indices) == 0: continue this_prediction_dict = prediction_io.subset_ungridded_predictions( prediction_dict=prediction_dict, desired_storm_indices=these_storm_indices) this_output_file_name = prediction_io.find_ungridded_file( directory_name=output_dir_name, hours_in_subset=chunk_to_hours_dict[i], raise_error_if_missing=False) print('Writing temporal subset to: "{0:s}"...'.format( this_output_file_name)) prediction_io.write_ungridded_predictions( netcdf_file_name=this_output_file_name, class_probability_matrix=this_prediction_dict[ prediction_io.PROBABILITY_MATRIX_KEY], storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY], storm_times_unix_sec=this_prediction_dict[ prediction_io.STORM_TIMES_KEY], observed_labels=this_prediction_dict[ prediction_io.OBSERVED_LABELS_KEY], target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY], model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY]) if i != num_hourly_chunks - 1: print(SEPARATOR_STRING)
def _run(prediction_file_name, top_tracking_dir_name, prob_threshold, grid_spacing_metres, output_dir_name): """Plots spatial distribution of false alarms. This is effectively the main method. :param prediction_file_name: See documentation at top of file. :param top_tracking_dir_name: Same. :param prob_threshold: Same. :param grid_spacing_metres: Same. :param output_dir_name: Same. """ # Process input args. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) error_checking.assert_is_greater(prob_threshold, 0.) error_checking.assert_is_less_than(prob_threshold, 1.) grid_metadata_dict = grids.create_equidistant_grid( min_latitude_deg=MIN_LATITUDE_DEG, max_latitude_deg=MAX_LATITUDE_DEG, min_longitude_deg=MIN_LONGITUDE_DEG, max_longitude_deg=MAX_LONGITUDE_DEG, x_spacing_metres=grid_spacing_metres, y_spacing_metres=grid_spacing_metres, azimuthal=False) # Read predictions and find positive forecasts and false alarms. print('Reading predictions from: "{0:s}"...'.format(prediction_file_name)) prediction_dict = prediction_io.read_ungridded_predictions( prediction_file_name) observed_labels = prediction_dict[prediction_io.OBSERVED_LABELS_KEY] forecast_labels = ( prediction_dict[prediction_io.PROBABILITY_MATRIX_KEY][:, -1] >= prob_threshold).astype(int) pos_forecast_indices = numpy.where(forecast_labels == 1)[0] false_alarm_indices = numpy.where( numpy.logical_and(observed_labels == 0, forecast_labels == 1))[0] num_examples = len(observed_labels) num_positive_forecasts = len(pos_forecast_indices) num_false_alarms = len(false_alarm_indices) print(('Probability threshold = {0:.3f} ... number of examples, positive ' 'forecasts, false alarms = {1:d}, {2:d}, {3:d}').format( prob_threshold, num_examples, num_positive_forecasts, num_false_alarms)) # Find and read tracking files. pos_forecast_id_strings = [ prediction_dict[prediction_io.STORM_IDS_KEY][k] for k in pos_forecast_indices ] pos_forecast_times_unix_sec = ( prediction_dict[prediction_io.STORM_TIMES_KEY][pos_forecast_indices]) file_times_unix_sec = numpy.unique(pos_forecast_times_unix_sec) num_files = len(file_times_unix_sec) storm_object_tables = [None] * num_files print(SEPARATOR_STRING) for i in range(num_files): this_tracking_file_name = tracking_io.find_file( top_tracking_dir_name=top_tracking_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, source_name=tracking_utils.SEGMOTION_NAME, valid_time_unix_sec=file_times_unix_sec[i], spc_date_string=time_conversion.time_to_spc_date_string( file_times_unix_sec[i]), raise_error_if_missing=True) print('Reading data from: "{0:s}"...'.format(this_tracking_file_name)) this_table = tracking_io.read_file(this_tracking_file_name) storm_object_tables[i] = this_table.loc[this_table[ tracking_utils.FULL_ID_COLUMN].isin(pos_forecast_id_strings)] if i == 0: continue storm_object_tables[i] = storm_object_tables[i].align( storm_object_tables[0], axis=1)[0] storm_object_table = pandas.concat(storm_object_tables, axis=0, ignore_index=True) print(SEPARATOR_STRING) # Find latitudes and longitudes of false alarms. all_id_strings = ( storm_object_table[tracking_utils.FULL_ID_COLUMN].values.tolist()) all_times_unix_sec = ( storm_object_table[tracking_utils.VALID_TIME_COLUMN].values) good_indices = tracking_utils.find_storm_objects( all_id_strings=all_id_strings, all_times_unix_sec=all_times_unix_sec, id_strings_to_keep=pos_forecast_id_strings, times_to_keep_unix_sec=pos_forecast_times_unix_sec, allow_missing=False) pos_forecast_latitudes_deg = storm_object_table[ tracking_utils.CENTROID_LATITUDE_COLUMN].values[good_indices] pos_forecast_longitudes_deg = storm_object_table[ tracking_utils.CENTROID_LONGITUDE_COLUMN].values[good_indices] false_alarm_id_strings = [ prediction_dict[prediction_io.STORM_IDS_KEY][k] for k in false_alarm_indices ] false_alarm_times_unix_sec = ( prediction_dict[prediction_io.STORM_TIMES_KEY][false_alarm_indices]) good_indices = tracking_utils.find_storm_objects( all_id_strings=all_id_strings, all_times_unix_sec=all_times_unix_sec, id_strings_to_keep=false_alarm_id_strings, times_to_keep_unix_sec=false_alarm_times_unix_sec, allow_missing=False) false_alarm_latitudes_deg = storm_object_table[ tracking_utils.CENTROID_LATITUDE_COLUMN].values[good_indices] false_alarm_longitudes_deg = storm_object_table[ tracking_utils.CENTROID_LONGITUDE_COLUMN].values[good_indices] pos_forecast_x_coords_metres, pos_forecast_y_coords_metres = ( projections.project_latlng_to_xy( latitudes_deg=pos_forecast_latitudes_deg, longitudes_deg=pos_forecast_longitudes_deg, projection_object=grid_metadata_dict[grids.PROJECTION_KEY])) num_pos_forecasts_matrix = grids.count_events_on_equidistant_grid( event_x_coords_metres=pos_forecast_x_coords_metres, event_y_coords_metres=pos_forecast_y_coords_metres, grid_point_x_coords_metres=grid_metadata_dict[grids.X_COORDS_KEY], grid_point_y_coords_metres=grid_metadata_dict[grids.Y_COORDS_KEY])[0] print(SEPARATOR_STRING) false_alarm_x_coords_metres, false_alarm_y_coords_metres = ( projections.project_latlng_to_xy( latitudes_deg=false_alarm_latitudes_deg, longitudes_deg=false_alarm_longitudes_deg, projection_object=grid_metadata_dict[grids.PROJECTION_KEY])) num_false_alarms_matrix = grids.count_events_on_equidistant_grid( event_x_coords_metres=false_alarm_x_coords_metres, event_y_coords_metres=false_alarm_y_coords_metres, grid_point_x_coords_metres=grid_metadata_dict[grids.X_COORDS_KEY], grid_point_y_coords_metres=grid_metadata_dict[grids.Y_COORDS_KEY])[0] print(SEPARATOR_STRING) num_pos_forecasts_matrix = num_pos_forecasts_matrix.astype(float) num_pos_forecasts_matrix[num_pos_forecasts_matrix == 0] = numpy.nan num_false_alarms_matrix = num_false_alarms_matrix.astype(float) num_false_alarms_matrix[num_false_alarms_matrix == 0] = numpy.nan far_matrix = num_false_alarms_matrix / num_pos_forecasts_matrix this_max_value = numpy.nanpercentile(num_false_alarms_matrix, MAX_COUNT_PERCENTILE_TO_PLOT) if this_max_value < 10: this_max_value = numpy.nanmax(num_false_alarms_matrix) figure_object = plotter._plot_one_value( data_matrix=num_false_alarms_matrix, grid_metadata_dict=grid_metadata_dict, colour_map_object=CMAP_OBJECT_FOR_COUNTS, min_colour_value=0, max_colour_value=this_max_value, plot_cbar_min_arrow=False, plot_cbar_max_arrow=True)[0] num_false_alarms_file_name = '{0:s}/num_false_alarms.jpg'.format( output_dir_name) print('Saving figure to: "{0:s}"...'.format(num_false_alarms_file_name)) figure_object.savefig(num_false_alarms_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object) this_max_value = numpy.nanpercentile(num_pos_forecasts_matrix, MAX_COUNT_PERCENTILE_TO_PLOT) if this_max_value < 10: this_max_value = numpy.nanmax(num_pos_forecasts_matrix) figure_object = plotter._plot_one_value( data_matrix=num_pos_forecasts_matrix, grid_metadata_dict=grid_metadata_dict, colour_map_object=CMAP_OBJECT_FOR_COUNTS, min_colour_value=0, max_colour_value=this_max_value, plot_cbar_min_arrow=False, plot_cbar_max_arrow=True)[0] num_pos_forecasts_file_name = '{0:s}/num_positive_forecasts.jpg'.format( output_dir_name) print('Saving figure to: "{0:s}"...'.format(num_pos_forecasts_file_name)) figure_object.savefig(num_pos_forecasts_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object) this_max_value = numpy.nanpercentile(far_matrix, MAX_FAR_PERCENTILE_TO_PLOT) this_min_value = numpy.nanpercentile(far_matrix, 100. - MAX_FAR_PERCENTILE_TO_PLOT) figure_object = plotter._plot_one_value( data_matrix=far_matrix, grid_metadata_dict=grid_metadata_dict, colour_map_object=CMAP_OBJECT_FOR_FAR, min_colour_value=this_min_value, max_colour_value=this_max_value, plot_cbar_min_arrow=this_min_value > 0., plot_cbar_max_arrow=this_max_value < 1.)[0] far_file_name = '{0:s}/false_alarm_ratio.jpg'.format(output_dir_name) print('Saving figure to: "{0:s}"...'.format(far_file_name)) figure_object.savefig(far_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object)